# Import Libraries

In [18]:
import pandas as pd
import numpy as np
from random import sample
import time
from datetime import datetime

# Import Models

In [4]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

# Training Set

In [5]:
data_train = pd.read_csv("Training_set.csv") # Read the training dataset into Jupyter note book

In [6]:
x_train = data_train.drop('Traffic_Type', axis =1) # Separate the features into a dataframe and assign it to a new variable (x_train)

In [7]:
y_train = data_train['Traffic_Type'] # Assign the label (Traffic_Type) to a new variable (y_train)

# Test Set

In [8]:
data_test = pd.read_csv("Test_set.csv") # Read the test dataset into Jupyter note book

In [9]:
x_test = data_test.drop('Traffic_Type', axis =1) # Separate the features into a dataframe and assign it to a new variable (x_test)

In [10]:
y_test = data_test['Traffic_Type'] # Assign the label (Traffic_Type) to a new variable (y_test)

# Standardize the features

In [11]:
scaler = StandardScaler() # Create an instance of a scaler function

In [12]:
x_train = scaler.fit_transform(x_train) # Apply the scaler to the training set.

In [13]:
x_test = scaler.transform(x_test) # Apply the scaler to the test set.

# Train the models

In [14]:
dt = DecisionTreeClassifier() # Create an instance of the decision tree model.
sv = SVC() # Create an instance of the support vector machine model.

In [15]:
dt.fit(x_train, y_train) # train/fit the decision tree model on the training data.

DecisionTreeClassifier()

In [16]:
sv.fit(x_train, y_train) # train/fit the support vector machine model on the training data.

SVC()

# Comparing the execution times of the DT and SVC models on predicting on the test set using 50 random samples of 4000 data points each from the test set.

In [19]:
arr_sv = [] # Array to hold the execution time for the SVM model
arr_dt = [] # Array to hold the execution time for the DT model


for i in range(50): # Generating a sample of 50 random data points of size 4000 each
    randomize = np.random.randint(22543, size=4000) # Declaring the random function
    sample = x_test[randomize,:] # Taking a sample of the test set   
    
    # Computing and recording the execution time of the DT model on predicting one sample of the test set 
    start_time = datetime.now()
    dt.predict(sample)
    end_time = datetime.now()
    arr_dt.append(end_time - start_time)
    
     # Computing and recording the execution time of the SVM model on predicting one sample of the test set 
    start_time = datetime.now()
    sv.predict(sample)
    end_time = datetime.now()
    arr_sv.append(end_time - start_time)

In [20]:
# Displaying the execution times for all 50 samples
print('Decision Tree','\t','Support Vector Machine', '\n')
for item in range(len(arr_dt)):
    print(arr_dt[item],'\t', arr_sv[item], '\n')

Decision Tree 	 Support Vector Machine 

0:00:00.002988 	 0:00:04.127124 

0:00:00.001203 	 0:00:03.986821 

0:00:00.001002 	 0:00:03.927674 

0:00:00.001986 	 0:00:03.912069 

0:00:00.000989 	 0:00:03.853239 

0:00:00.002156 	 0:00:03.985677 

0:00:00.001947 	 0:00:03.898094 

0:00:00.001010 	 0:00:03.891078 

0:00:00.001001 	 0:00:03.870097 

0:00:00.001986 	 0:00:03.895012 

0:00:00.001999 	 0:00:03.801914 

0:00:00.001999 	 0:00:03.829152 

0:00:00.001467 	 0:00:03.686159 

0:00:00.001000 	 0:00:03.837024 

0:00:00.001000 	 0:00:03.431525 

0:00:00.002002 	 0:00:03.834841 

0:00:00.001997 	 0:00:03.909461 

0:00:00.001002 	 0:00:03.735957 

0:00:00.001044 	 0:00:04.023923 

0:00:00.000984 	 0:00:03.732611 

0:00:00.000991 	 0:00:03.712148 

0:00:00 	 0:00:03.802663 

0:00:00.002005 	 0:00:03.885617 

0:00:00.001054 	 0:00:03.815638 

0:00:00.001968 	 0:00:03.991840 

0:00:00.001969 	 0:00:03.943626 

0:00:00.001997 	 0:00:03.872814 

0:00:00.001012 	 0:00:03.802520 

0:00:00.001163

In [30]:
# Splitting the execution time values in the DT array and extracting seconds portion of the time.
t_dt = pd.DataFrame(arr_dt)
seconds_dt = []
for j in range(len(t_dt)):
    seconds_dt.append(str(t_dt.iloc[j]).split()[3][6:])
print(seconds_dt)

['00.002988', '00.001203', '00.001002', '00.001986', '00.000989', '00.002156', '00.001947', '00.001010', '00.001001', '00.001986', '00.001999', '00.001999', '00.001467', '00.001000', '00.001000', '00.002002', '00.001997', '00.001002', '00.001044', '00.000984', '00.000991', '', '00.002005', '00.001054', '00.001968', '00.001969', '00.001997', '00.001012', '00.001163', '00.001002', '00.001998', '00.001000', '00.001551', '00.002079', '00.001007', '00.001319', '00.001989', '00.001999', '00.000999', '00.000995', '00.001994', '00.001002', '00.002000', '00.001201', '00.002024', '00.002003', '00.001000', '00.002004', '00.001000', '00.000995']


In [23]:
# Splitting the execution time values in the SVM array and extracting seconds portion of the time.
t_sv = pd.DataFrame(arr_sv)
seconds_sv = []
for k in range(len(t_sv)):
    seconds_sv.append(str(t_sv.iloc[k]).split()[3][6:])
print(seconds_sv)

['04.127124', '03.986821', '03.927674', '03.912069', '03.853239', '03.985677', '03.898094', '03.891078', '03.870097', '03.895012', '03.801914', '03.829152', '03.686159', '03.837024', '03.431525', '03.834841', '03.909461', '03.735957', '04.023923', '03.732611', '03.712148', '03.802663', '03.885617', '03.815638', '03.991840', '03.943626', '03.872814', '03.802520', '03.801725', '03.771184', '03.483491', '03.743792', '03.641835', '03.699370', '03.530895', '03.785271', '03.882440', '03.845122', '03.651377', '03.792795', '04.030673', '03.840621', '03.729567', '03.880949', '03.512459', '03.657523', '03.815451', '03.768251', '03.881758', '03.808729']


In [31]:
# Placing both sets of values in a dataframe and saving it as an excel worksheet.
Dec_Tree = pd.DataFrame(seconds_dt, columns=['Decision Tree'])
SVM = pd.DataFrame(seconds_sv, columns=['Support Vector Machine'])
frames = [Dec_Tree, SVM]
project = pd.concat(frames, axis=1)
project = project.apply(pd.to_numeric)
project.to_excel('Project2.xlsx', index=False)