## Libraries

In [1]:
#Base Libraries
import os
import pandas as pd
from datetime import date
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import missingno as msno

In [3]:
# sklearn ML libraries

#Preprocessing
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder, LabelEncoder

# Supervised Learning Models
from sklearn.linear_model import LinearRegression, Ridge, Lasso, LogisticRegression
from sklearn.svm import SVC, SVR
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor, GradientBoostingClassifier, GradientBoostingRegressor, AdaBoostClassifier, AdaBoostRegressor, VotingClassifier, VotingRegressor
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.neural_network import MLPClassifier, MLPRegressor

# Unsupervised Learning Models
from sklearn.cluster import KMeans, AgglomerativeClustering, DBSCAN
from sklearn.decomposition import PCA, TruncatedSVD

# Model Selection and Evaluation
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV

#metrics
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [1]:
# Tensorflow libraries

import tensorflow as tf

# Keras (high-level neural networks API integrated with TensorFlow)
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten, LSTM, Embedding
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from tensorflow.keras.losses import SparseCategoricalCrossentropy, MeanSquaredError
from tensorflow.keras.metrics import Accuracy, MeanAbsoluteError
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

# Image Preprocessing
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Model Evaluation
from tensorflow.keras.utils import plot_model
from tensorflow.keras.models import load_model

# Other TensorFlow modules
from tensorflow.keras import Input, Model
from tensorflow.keras import layers
from tensorflow.keras import datasets

# TensorBoard for visualization
from tensorflow.keras.callbacks import TensorBoard




## Importing Data

In [2]:
# Loading dataset - csv
# df = pd.read_csv('your_dataset.csv')

## Exploratory Data Analysis

In [None]:
#applying correct type to columns 
int_cols = ['col1','col2','col3']
float_cols = ['col4','col5','col6']

#defining data type
df[int_cols] = df[int_cols].astype(int)
df[float_cols] = df[float_cols].astype(float)

In [None]:
#Describe dataset
print(df.columns)
print(df.info())
print(df.describe())
print(df.shape)

In [None]:
#summary of null values
print(df.isnull().sum())

#plotting matrix of missing data
msno.matrix(df)
plt.show()

In [None]:
#custom function to print all unique values and their counts from each column
def print_uniques(df):
    #for large datasets - using generator object for speed
    uniques_generator = ((x, df[x].unique(), df[x].nunique()) for x in df.columns)
    
    print('\nUnique Values:')
    for x, unique_values, num_unique in uniques_generator:
        print(f"{x}: \n {unique_values} \n ({num_unique} unique values)")

## Data Preprocessing

In [None]:
#defining unnecessary columns
drop_cols = ['location', 'crash_date_est_i','report_type', 'intersection_related_i',
       'hit_and_run_i', 'photos_taken_i', 'crash_date_est_i', 'injuries_unknown',
       'private_property_i', 'statements_taken_i', 'dooring_i', 'work_zone_i',
       'work_zone_type', 'workers_present_i','the_geom','rd_no',
            'SECTOR','BEAT','BEAT_NUM']

#dropping columns
df=df.drop(columns=drop_cols)

#plotting matrix of missing data
msno.matrix(df)
plt.show()

#info of sorted data
print(df.info())