<a href="https://colab.research.google.com/github/erlanig/machine-learning/blob/main/NB_Traffic%20Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Library

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import model_selection
from sklearn import metrics
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB

#Import Data

In [None]:
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
data = pd.read_csv('Traffic.csv')
data.head(30)

Unnamed: 0,Time,Date,Day of the week,CarCount,BikeCount,BusCount,TruckCount,Total,Traffic Situation
0,12:00:00 AM,10,Tuesday,31,0,4,4,39,low
1,12:15:00 AM,10,Tuesday,49,0,3,3,55,low
2,12:30:00 AM,10,Tuesday,46,0,3,6,55,low
3,12:45:00 AM,10,Tuesday,51,0,2,5,58,low
4,1:00:00 AM,10,Tuesday,57,6,15,16,94,normal
5,1:15:00 AM,10,Tuesday,44,0,5,4,53,low
6,1:30:00 AM,10,Tuesday,37,0,1,4,42,low
7,1:45:00 AM,10,Tuesday,42,4,4,5,55,low
8,2:00:00 AM,10,Tuesday,51,0,9,7,67,low
9,2:15:00 AM,10,Tuesday,34,0,4,7,45,low


In [None]:
data.columns

Index(['Time', 'Date', 'Day of the week', 'CarCount', 'BikeCount', 'BusCount',
       'TruckCount', 'Total', 'Traffic Situation'],
      dtype='object')

In [None]:
data.shape

(2976, 9)

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2976 entries, 0 to 2975
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Time               2976 non-null   object
 1   Date               2976 non-null   int64 
 2   Day of the week    2976 non-null   object
 3   CarCount           2976 non-null   int64 
 4   BikeCount          2976 non-null   int64 
 5   BusCount           2976 non-null   int64 
 6   TruckCount         2976 non-null   int64 
 7   Total              2976 non-null   int64 
 8   Traffic Situation  2976 non-null   object
dtypes: int64(6), object(3)
memory usage: 209.4+ KB


#Preprocessing

In [None]:
# Assuming df is your DataFrame
# Define a mapping from day names to numerical values
day_mapping = {'Monday': 1, 'Tuesday': 2, 'Wednesday': 3, 'Thursday': 4, 'Friday': 5, 'Saturday': 6, 'Sunday': 7}
traffic_situation_mapping = {'low': 0, 'normal': 1, 'high': 2, 'heavy': 3}

# Apply the mapping to the 'Day of the week' column
data['Day of the week'] = data['Day of the week'].map(day_mapping)
data['Traffic Situation'] = data['Traffic Situation'].map(traffic_situation_mapping)


In [None]:
# Assuming df is your DataFrame
data['Day of the week'] = data['Day of the week'].astype(int)

In [None]:
data['midday'] = ''

for i in range(len(data['Time'])):

    if data['Time'][i][-2:] == 0:
        data.loc[i, 'midday'] = 1

    elif data['Time'][i][-2:] == 1:
        data.loc[i, 'midday'] = 1

# removing 'AM' or 'PM' form Time column
data['Time'] = data['Time'].str[:-2]

In [None]:
# Assuming 'Time' is the name of the column in your DataFrame
data['Time'] = pd.to_datetime(data['Time']).dt.hour * 3600 + \
                     pd.to_datetime(data['Time']).dt.minute * 60 + \
                     pd.to_datetime(data['Time']).dt.second

In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2976 entries, 0 to 2975
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Time               2976 non-null   int64 
 1   Date               2976 non-null   int64 
 2   Day of the week    2976 non-null   int64 
 3   CarCount           2976 non-null   int64 
 4   BikeCount          2976 non-null   int64 
 5   BusCount           2976 non-null   int64 
 6   TruckCount         2976 non-null   int64 
 7   Total              2976 non-null   int64 
 8   Traffic Situation  2976 non-null   int64 
 9   midday             2976 non-null   object
dtypes: int64(9), object(1)
memory usage: 232.6+ KB


In [None]:
traffic_df = data.drop('midday', axis=1)
traffic_df = traffic_df.drop('Date', axis=1)

In [None]:
traffic_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2976 entries, 0 to 2975
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype
---  ------             --------------  -----
 0   Time               2976 non-null   int64
 1   Day of the week    2976 non-null   int64
 2   CarCount           2976 non-null   int64
 3   BikeCount          2976 non-null   int64
 4   BusCount           2976 non-null   int64
 5   TruckCount         2976 non-null   int64
 6   Total              2976 non-null   int64
 7   Traffic Situation  2976 non-null   int64
dtypes: int64(8)
memory usage: 186.1 KB


In [None]:
array = traffic_df.values

X = array[:,0:7]

Y = array[:,7]

validation_size = 0.20

seed = 7

X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X, Y, test_size=validation_size, random_state=seed)

In [None]:
# Test options and evaluation metric

seed = 7
scoring = 'acuracy'

In [None]:
# Spot Check Algorithms
models = []
models.append(('KNN', KNeighborsClassifier()))
models.append(('NB', GaussianNB()))
models.append(('SVM', SVC()))

# evaluate each model in turn
results = []
names = []
for name, model in models:
  kfold = model_selection.KFold(n_splits=10, shuffle=True, random_state=seed)
  cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring='accuracy')
  results.append(cv_results)
  names.append(name)
  msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
  print(msg)


KNN: 0.802521 (0.024500)
NB: 0.821849 (0.017446)
SVM: 0.601681 (0.035043)


##KNN

In [None]:
# Memanggil fungsi Naive Bayes
knn = KNeighborsClassifier()

# Memasukkan data training pada fungsi klasifikasi naive bayes
data_training = knn.fit(X,Y)

# Melakukan prediksi pada data training
Y_predict = data_training.predict(X)
print(Y_predict)

[1 1 1 ... 1 1 1]


In [None]:
knn = KNeighborsClassifier()
knn.fit(X_train, Y_train)
predictions = knn.predict(X_validation)
accuracy = accuracy_score(Y_validation, predictions)

print("Accuracy: ", accuracy)
print("\nConfusion Matrix: ")
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))

Accuracy:  0.7768456375838926

Confusion Matrix: 
[[ 23  44   0   0]
 [ 17 290  12   8]
 [  0  20  34   2]
 [  0  18  12 116]]
              precision    recall  f1-score   support

           0       0.57      0.34      0.43        67
           1       0.78      0.89      0.83       327
           2       0.59      0.61      0.60        56
           3       0.92      0.79      0.85       146

    accuracy                           0.78       596
   macro avg       0.72      0.66      0.68       596
weighted avg       0.77      0.78      0.77       596



##NB

In [None]:
# Memanggil fungsi Naive Bayes
nb = GaussianNB()

# Memasukkan data training pada fungsi klasifikasi naive bayes
data_training = nb.fit(X,Y)

# Melakukan prediksi pada data training
Y_predict = data_training.predict(X)
print(Y_predict)

[0 0 0 ... 1 1 1]


In [None]:
nb = GaussianNB()
nb.fit(X_train, Y_train)
predictions = nb.predict(X_validation)
accuracy = accuracy_score(Y_validation, predictions)

print("Accuracy: ", accuracy)
print("\nConfusion Matrix: ")
print(confusion_matrix(Y_validation, predictions))
print(classification_report(Y_validation, predictions))

Accuracy:  0.7869127516778524

Confusion Matrix: 
[[ 40  27   0   0]
 [  1 256  53  17]
 [  0  12  38   6]
 [  0   0  11 135]]
              precision    recall  f1-score   support

           0       0.98      0.60      0.74        67
           1       0.87      0.78      0.82       327
           2       0.37      0.68      0.48        56
           3       0.85      0.92      0.89       146

    accuracy                           0.79       596
   macro avg       0.77      0.75      0.73       596
weighted avg       0.83      0.79      0.80       596



In [None]:
import pandas as pd

# Assuming 'time' is the time in "12:00 AM" format
time = "3:30:00 AM"

# Convert 'time' to an integer representing seconds
time_seconds = pd.to_datetime(time).hour * 3600 + \
               pd.to_datetime(time).minute * 60 + \
               pd.to_datetime(time).second
day = 3
carcount= 129
bikecount= 22
buscount= 42
truckcount = 1
count = (carcount, bikecount, buscount, truckcount)
total= sum(count)
Data_Testing = [[time_seconds, day, carcount, bikecount, buscount, truckcount, total]]
print(Data_Testing)
y_pred = data_training.predict(Data_Testing)
if y_pred == 0:
    hasil = "Low"
elif y_pred == 1:
    hasil = "Normal"
elif y_pred == 2:
    hasil = "High"
elif y_pred == 3:
    hasil = "Heavy"
else:
    hasil == "Error"
print("Hasil Prediksi Traffic :", hasil)

[[12600, 3, 129, 22, 42, 1, 194]]
Hasil Prediksi Traffic : Heavy


In [None]:
import pickle
pickle.dump(data_training, open('model.pkl', 'wb'))