In [39]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.impute import IterativeImputer
from sklearn.preprocessing import OrdinalEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Conv1D, MaxPooling1D
from hmmlearn import hmm

In [40]:
data = pd.read_csv("C:/GitHub/Data_Science_learnig/Machine_Learing/Mice_Algorithm/Static/travel-times.csv")
data.head()

Unnamed: 0,Date,StartTime,DayOfWeek,GoingTo,Distance,MaxSpeed,AvgSpeed,AvgMovingSpeed,FuelEconomy,TotalTime,MovingTime,Take407All,Comments
0,1/6/2012,16:37,Friday,Home,51.29,127.4,78.3,84.8,,39.3,36.3,No,
1,1/6/2012,08:20,Friday,GSK,51.63,130.3,81.8,88.9,,37.9,34.9,No,
2,1/4/2012,16:17,Wednesday,Home,51.27,127.4,82.0,85.8,,37.5,35.9,No,
3,1/4/2012,07:53,Wednesday,GSK,49.17,132.3,74.2,82.9,,39.8,35.6,No,
4,1/3/2012,18:57,Tuesday,Home,51.15,136.2,83.4,88.1,,36.8,34.8,No,


In [41]:
data = data.drop("Comments", axis=1)

In [42]:
data.shape

(205, 12)

In [43]:
categorical_columns = data.select_dtypes(include=['object']).columns.tolist()
print(categorical_columns)
# Replace '-' with NaN in numerical columns
numerical_cols = [col for col in data.columns if col not in ['Date', 'StartTime', 'Take407All', 'Comments']]
data[numerical_cols] = data[numerical_cols].replace('-', float('nan'))


['Date', 'StartTime', 'DayOfWeek', 'GoingTo', 'FuelEconomy', 'Take407All']


In [44]:
data.isna().sum()
data.isnull().sum()

Date               0
StartTime          0
DayOfWeek          0
GoingTo            0
Distance           0
MaxSpeed           0
AvgSpeed           0
AvgMovingSpeed     0
FuelEconomy       19
TotalTime          0
MovingTime         0
Take407All         0
dtype: int64

In [45]:
# Handle categorical data using factorization
data['DayOfWeek'] = pd.factorize(data['DayOfWeek'])[0]
data['GoingTo'] = pd.factorize(data['GoingTo'])[0]

In [46]:
# Split data into features (X) and target variable (y)
X = data.drop(['Date', 'StartTime', 'Take407All'], axis=1)
y = data['Take407All']

In [47]:
# Convert target variable to binary (0 and 1)
y_binary = y.map({'No': 0, 'Yes': 1})

# Check the data type and unique values of the target variable
print("Data type of y_binary:", y_binary.dtype)
print("Unique values of y_binary:", y_binary.unique())


Data type of y_binary: int64
Unique values of y_binary: [0 1]


In [48]:
X.isna().sum()

DayOfWeek          0
GoingTo            0
Distance           0
MaxSpeed           0
AvgSpeed           0
AvgMovingSpeed     0
FuelEconomy       19
TotalTime          0
MovingTime         0
dtype: int64

In [49]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_binary, test_size=0.2, random_state=42)

# Apply MICE algorithm to impute missing values in numerical features
imputer = IterativeImputer(random_state=0)
X_train_imputed = imputer.fit_transform(X_train[numerical_cols])
X_test_imputed = imputer.transform(X_test[numerical_cols])


In [50]:

# RandomForestClassifier
clf = RandomForestClassifier()
clf.fit(X_train_imputed, y_train)

# Evaluate the model's performance
y_pred = clf.predict(X_test_imputed)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

Accuracy: 0.9024390243902439


In [51]:
# Logistic Regression
clf_lr = LogisticRegression()
clf_lr.fit(X_train_imputed, y_train)
y_pred_lr = clf_lr.predict(X_test_imputed)
accuracy_lr = accuracy_score(y_test, y_pred_lr)
print("Logistic Regression Accuracy:", accuracy_lr)


Logistic Regression Accuracy: 0.926829268292683


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [52]:
# Support Vector Machine
clf_svm = SVC()
clf_svm.fit(X_train_imputed, y_train)
y_pred_svm = clf_svm.predict(X_test_imputed)
accuracy_svm = accuracy_score(y_test, y_pred_svm)
print("SVM Accuracy:", accuracy_svm)

SVM Accuracy: 0.8780487804878049


In [55]:

# Deep Neural Network (DNN)
model_dnn = Sequential([
    Dense(64, activation='relu', input_shape=(X_train_imputed.shape[1],)),
    Dense(64, activation='relu'),
    Dense(1, activation='sigmoid')
])
model_dnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_dnn.fit(X_train_imputed, y_train, epochs=10, batch_size=32, verbose=0)
accuracy_dnn = model_dnn.evaluate(X_test_imputed, y_test, verbose=0)[1]
print("DNN Accuracy:", accuracy_dnn)


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
DNN Accuracy: 0.9268292784690857


In [56]:
# Convolutional Neural Network (CNN)
X_train_cnn = X_train_imputed.reshape((X_train_imputed.shape[0], X_train_imputed.shape[1], 1))
X_test_cnn = X_test_imputed.reshape((X_test_imputed.shape[0], X_test_imputed.shape[1], 1))
model_cnn = Sequential([
    Conv1D(64, 3, activation='relu', input_shape=(X_train_cnn.shape[1], X_train_cnn.shape[2])),
    MaxPooling1D(2),
    Flatten(),
    Dense(50, activation='relu'),
    Dense(1, activation='sigmoid')
])
model_cnn.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model_cnn.fit(X_train_cnn, y_train, epochs=10, batch_size=32, verbose=0)
accuracy_cnn = model_cnn.evaluate(X_test_cnn, y_test, verbose=0)[1]
print("CNN Accuracy:", accuracy_cnn)


Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: closure mismatch, requested ('self', 'step_function'), but source function had ()
CNN Accuracy: 0.8780487775802612
