In [13]:
import pandas as pd  
import numpy as np  
import matplotlib.pyplot as plt 

from sklearn.model_selection import train_test_split  
from sklearn.svm import SVC
from sklearn.metrics import classification_report, confusion_matrix  



# Load of the file of data about the 50 last meters to the traffic light 


In [14]:
badBehaviourData = pd.read_csv("C:/Users/Ivan/Desktop/LastSimulations/Trafficlight-simulations/results.txt",delim_whitespace=True, names =[ "class", 55,  50, 45,  40,  35,  30,  25,  20,  15,  10,  5])  
badBehaviourData.head()

Unnamed: 0,class,55,50,45,40,35,30,25,20,15,10,5
0,1,37.25,37.11,36.89,36.7,36.56,36.37,36.16,35.96,35.84,35.6,35.37
1,1,37.04,36.93,36.72,36.51,36.31,36.18,35.98,35.75,35.56,35.43,35.24
2,1,37.15,36.93,36.79,36.58,36.36,36.19,36.05,35.82,35.62,35.41,35.2
3,1,35.44,35.42,35.44,35.44,35.42,35.43,35.43,35.43,35.43,35.37,35.19
4,1,34.28,34.27,34.26,34.26,34.27,34.27,34.27,34.26,34.26,34.27,34.27


# Division of the data in x and y axis
We need to separate the class column that shows when a vehicle have a red light running (1), known as bad behaviour, and a right stop at the traffic light, known as right behaviour (0)

> bad behaviour **(1)**

> good behaviour **(0)**

In [15]:
x = badBehaviourData.drop('class', axis=1)  
y = badBehaviourData['class']

# Division of the data in training, test and validation datasets
To do this we divide first all the data into training and test, and later we divide again the training dataset into training and validation

In [16]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=1)

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.2, random_state=1)

# Training Phase
Now we have separate the different datasets we can train our SVM using the training data. Scikit-Learn contains the svm library, which contains built-in classes for different SVM algorithms. This class takes one parameter, which is the kernel type. This is very important. In the case of a simple SVM we simply set this parameter as "linear" since simple SVMs can only classify linearly separable data.

The fit method of SVC class is called to train the algorithm on the training data, which is passed as a parameter to the fit method.

In [17]:
svclassifier = SVC(kernel='poly')  
svclassifier.fit(x_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='poly',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

# Predictions 
Once our model has been trained with the data, we perform the predictions, to later, evaluate the right working of the system

In [20]:
y_pred = svclassifier.predict(x_test)  


# Result Evaluation 
We do the confusion matrix and the classification report.
Confusion matrix, precision, recall, and F1 measures are the most commonly used metrics for classification tasks. Scikit-Learn metrics library contains the classification_report and confusion_matrix methods, which can be readily used to find out the values for these important metrics.

### Confusion matrix
The confusion matrix represents in each column, the number of predictions for each class and in each row it represents the number of elements of each class in the real life. 

### Clasification report

##### Precision
Precision is the ability of a classiifer not to label an instance positive that is actually negative. For each class it is defined as as the ratio of true positives to the sum of true and false positives. Said another way, “for all instances classified positive, what percent was correct?”
##### Recall
Recall is the ability of a classifier to find all positive instances. For each class it is defined as the ratio of true positives to the sum of true positives and false negatives. Said another way, “for all instances that were actually positive, what percent was classified correctly?”
##### F1 score
The F1 score is a weighted harmonic mean of precision and recall such that the best score is 1.0 and the worst is 0.0. Generally speaking, F1 scores are lower than accuracy measures as they embed precision and recall into their computation. As a rule of thumb, the weighted average of F1 should be used to compare classifier models, not global accuracy.
##### Support
Support is the number of actual occurrences of the class in the specified dataset. Imbalanced support in the training data may indicate structural weaknesses in the reported scores of the classifier and could indicate the need for stratified sampling or rebalancing. Support doesn’t change between models but instead diagnoses the evaluation process.



In [30]:
target_names = ['good behaviour', 'bad behaviour']
print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred, target_names=target_names))

[[1751   36]
 [   0 3957]]
                precision    recall  f1-score   support

good behaviour       1.00      0.98      0.99      1787
 bad behaviour       0.99      1.00      1.00      3957

   avg / total       0.99      0.99      0.99      5744



# System training using linear algorithm

In [31]:
svclassifier = SVC(kernel='linear')  
svclassifier.fit(x_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='linear',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [32]:
y_pred = svclassifier.predict(x_test)  
print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred, target_names=target_names))


[[1701   86]
 [  19 3938]]
                precision    recall  f1-score   support

good behaviour       0.99      0.95      0.97      1787
 bad behaviour       0.98      1.00      0.99      3957

   avg / total       0.98      0.98      0.98      5744



# System training using rbf algorithm

In [39]:
svclassifier = SVC(kernel='rbf')  
svclassifier.fit(x_train, y_train)

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)

In [40]:
y_pred = svclassifier.predict(x_test)  
print(confusion_matrix(y_test,y_pred))  
print(classification_report(y_test,y_pred, target_names=target_names))

[[1771   16]
 [   1 3956]]
                precision    recall  f1-score   support

good behaviour       1.00      0.99      1.00      1787
 bad behaviour       1.00      1.00      1.00      3957

   avg / total       1.00      1.00      1.00      5744

