## VEDRAR - Vehicle Event Data Recorder and Review System

### Importing supporting libraries

In [8]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

### Load Training DataSet 

In [5]:
data = pd.read_csv('sensor.csv')

In [6]:
data.head()

Unnamed: 0,speed,proximity,tilt,cat sp-pr,cat sp-tl
0,60,4,90,1,1
1,59,4,80,1,1
2,59,4,80,1,1
3,59,3,70,0,1
4,58,4,80,1,1


##### 1. Speed is measured in Kmh. proximity in meters and tilt in degree.
##### 2. 'cat sp-pr' column categorize the driver as good or bad based on proximity/distance maintained at a certain speed. 
##### 3. 'Cat sp-tl' column categorize the driver as good or bad based on the tilt angle while taking a turn at a certain speed.
##### 4. 1 - denotes a good driving practice & 0 - denotes a bad driving practice

### Pre-processing

In [10]:
from sklearn.preprocessing import MinMaxScaler

In [11]:
scaler = MinMaxScaler()

In [19]:
data.speed = scaler.fit_transform(data[['speed']])
data.proximity = scaler.fit_transform(data[['proximity']])
data.tilt = scaler.fit_transform(data[['tilt']])

In [20]:
data.head()

Unnamed: 0,speed,proximity,tilt,cat sp-pr,cat sp-tl
0,1.0,1.0,1.0,1,1
1,0.983333,1.0,0.875,1,1
2,0.983333,1.0,0.875,1,1
3,0.983333,0.666667,0.75,0,1
4,0.966667,1.0,0.875,1,1


In [29]:
X_sp_pr = data[['speed','proximity']]
X_sp_tl = data[['speed','tilt']]

### Training Speed vs Proximity

In [45]:
y_sp_pr = data['cat sp-pr']

In [46]:
xtrain,xtest,ytrain,ytest = train_test_split(X_sp_pr,y_sp_pr,train_size=0.7)

In [47]:
len(xtrain),len(ytrain)

(26, 26)

In [48]:
model = SVC()

In [49]:
model.fit(xtrain,ytrain)

SVC()

In [50]:
model.score(xtest,ytest)

1.0

#### High accuracy or overfitting is clearly due to the lack of data

### Speed vs Tilt

In [51]:
y_sp_tl = data['cat sp-tl']

In [56]:
xtrain1,xtest1,ytrain1,ytest1 = train_test_split(X_sp_tl,y_sp_tl,train_size=0.7)

In [60]:
model1 = SVC()

In [62]:
model1.fit(xtrain1,ytrain1)

SVC()

In [65]:
model.score(xtest1,ytest1)

0.9166666666666666

### Working with actual Data
#### Only testing Speed vs Proximity for demo purpose 

In [78]:
data_real = pd.read_csv('realdata.csv')

In [79]:
data_real.head()

Unnamed: 0,speed,proximity
0,29,1
1,0,1
2,53,2
3,52,2
4,51,2


In [80]:
data_real.speed = scaler.fit_transform(data[['speed']])
data_real.proximity = scaler.fit_transform(data[['proximity']])

In [81]:
data_real.head()

Unnamed: 0,speed,proximity
0,1.0,1.0
1,0.983333,1.0
2,0.983333,1.0
3,0.983333,0.666667
4,0.966667,1.0


In [82]:
X = data[['speed','proximity']]

In [85]:
sp_pr_data = model.predict(X)

In [86]:
sp_pr_data

array([1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0])

In [93]:
good = np.count_nonzero(sp_pr_data == 1)
bad = np.count_nonzero(sp_pr_data == 0)

### The ratio of number of times the driver followed good driving practice to bad driving practice in a given period of time helps in classifying the driver as good or bad

In [94]:
final_sp_pr_ratio = good/bad

In [95]:
final_sp_pr_ratio

0.9

#### Similarly the ratio between speed and tilt can be calculated for the same driving period.
#### For demo purpose we take the ratio as 0.8

In [96]:
final_sp_tl_ration = 0.8

## Final Classsification

#### A driver is classified as good if the ratio of speed vs proximity and speed vs tilt for a given period of time is greater than or equal to 0.75 i.e, the driver followed good driving practices for over 75% of the driving period.

In [98]:
if (final_sp_pr_ratio >= 0.75) and (final_sp_tl_ration >= 0.75):
    print("Passed")
else : print('Fail')

Passed
