# Data Visualisation
 ## Data Importation
 

In [5]:
from plotly.offline import download_plotlyjs, init_notebook_mode, iplot
from sklearn.model_selection import cross_val_predict
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import RandomForestRegressor 
from sklearn.metrics import confusion_matrix
from plotly.graph_objects import *
import plotly.figure_factory as ff
import plotly.graph_objects as go
import plotly.express as px
import pandas as pd
import numpy as np

init_notebook_mode(connected=True)


X_train = pd.read_pickle("x_train.pkl")
y_train = pd.read_pickle("y_train.pkl")
y_train_time = pd.read_pickle("y_train_time.pkl")

## Target Time Occurrences

In [2]:
y_train_time['time'].value_counts().to_frame()

Unnamed: 0,time
4.0,2860
0.1,572
0.6,572
0.7,572
0.4,572
0.2,572
0.8,572
0.3,572
0.9,572
1.0,572


## Target Class Occurrences

In [2]:
x = list(y_train['class'].value_counts().index.values)
y = y_train['class'].value_counts().tolist()

new_x = []

for label in x:
    if label == 0:
        new_x.append('Turn Left')
    elif label == 1:
        new_x.append('Keep Lane')
    elif label == 2:
        new_x.append('Turn Right')


data = [go.Bar(x=new_x, y=y, text=y)]

fig = go.Figure(
    data=data
)

fig.update_traces(textposition='auto')

fig.update_layout(
    title='Target Class Occurrences',
    xaxis_title='Classes',
    yaxis_title='Occurrences',
    xaxis={'tickformat': ',d'},
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    uniformtext_minsize=5,
    uniformtext_mode='hide'
)

fig

## Features Correlation Matrix

In [4]:
corr_mtx = X_train.corr().values

color_scale = [ [0, "rgb(0,0,255)"], [0.1, "rgb(51,153,255)"], [0.2, "rgb(102,204,255)"], 
                [0.3, "rgb(153,204,255)"], [0.4, "rgb(204,204,255)"], [0.5, "rgb(255,255,255)"],
                [0.6, "rgb(255,204,255)"], [0.7, "rgb(255,153,255)"], [0.8, "rgb(255,102,204)"],
                [0.9, "rgb(255,102,102)"], [1, "rgb(255,0,0)"] ]

trace1 = {
  "uid": "0f0d45", 
  "name": "trace 0", 
  "type": "heatmap", 
  "x": list(X_train.columns), 
  "y": list(X_train.columns), 
  "zmax": np.max(np.abs(corr_mtx)), 
  "zmin": np.min(np.abs(corr_mtx)), 
  "z": corr_mtx, 
  "colorscale": color_scale
}

data = [go.Heatmap(trace1)]

fig = Figure(data=data)

fig.update_layout(
    title='Features Correlation Matrix',
    xaxis={'tickformat': ',d'},
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    uniformtext_minsize=5,
    uniformtext_mode='hide'
)


fig

## Classification
### Confusion Matrix
#### Best Features for Classification

In [6]:
fb_best_features_classification = ['v_Vel', 'lateral_current_lane', 'longit_pos_vehicle1', 'longit_pos_vehicle2', 'longit_pos_vehicle3', 'lat_pos_vehicle2', 'iTTC_ref3', 'longit_pos_preced1', 'longit_pos_follow1', 'longit_pos_preced2', 'longit_pos_follow2', 'iTTC_preced1', 'iTTC_follow1', 'iTTC_preced2', 'iTTC_follow2']

sb_best_features_classification = ['lateral_current_lane', 'iTTC_follow1', 'longit_pos_follow1', 'longit_pos_preced2', 'longit_pos_vehicle3', 'longit_pos_preced1', 'v_Vel', 'iTTC_preced1', 'iTTC_preced2', 'lat_pos_vehicle1', 'v_Vel_preced1', 'v_Vel_Ref3', 'lat_pos_vehicle2', 'longit_pos_vehicle2', 'iTTC_follow2', 'v_Vel_preced2', 'longit_pos_vehicle1', 'iTTC_ref3', 'longit_pos_follow2']

In [7]:
rf_clf_fb = RandomForestClassifier(n_jobs=-1)
rf_clf_sb = RandomForestClassifier(n_jobs=-1)

y_pred_fb = cross_val_predict(rf_clf_fb, X_train[fb_best_features_classification], y_train.values.ravel(), cv=10, n_jobs=-1)
y_pred_sb = cross_val_predict(rf_clf_sb, X_train[sb_best_features_classification], y_train.values.ravel(), cv=10, n_jobs=-1)

conf_mat_fb = confusion_matrix(y_train.values.ravel(), y_pred_fb)
conf_mat_sb = confusion_matrix(y_train.values.ravel(), y_pred_sb)

In [8]:
def show_confusion_matrix(confusion_matrix):
    x = ['Left', 'Keep', 'Right']
    y = ['Left', 'Keep', 'Right']

    # change each element of z to type string for annotations
    z_text = [[str(y) for y in x] for x in confusion_matrix]

    # set up figure 
    fig = ff.create_annotated_heatmap(confusion_matrix, x=x, y=y, annotation_text=z_text, colorscale='Plotly3')

    # add title
    fig.update_layout(title_text='Confusion Matrix',
                  #xaxis = dict(title='x'),
                  #yaxis = dict(title='x')
                 )

    # add custom xaxis title
    fig.add_annotation(dict(font=dict(color="black",size=14),
                        x=0.5,
                        y=-0.15,
                        showarrow=False,
                        text="Predicted value",
                        xref="paper",
                        yref="paper"))

    # add custom yaxis title
    fig.add_annotation(dict(font=dict(color="black",size=14),
                        x=-0.35,
                        y=0.5,
                        showarrow=False,
                        text="Actual value",
                        textangle=-90,
                        xref="paper",
                        yref="paper"))

    # adjust margins to make room for yaxis title
    fig.update_layout(margin=dict(t=50, l=200))
    fig.update_layout(
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(0,0,0,0)',
    uniformtext_minsize=5,
    uniformtext_mode='hide'
)

    # add colorbar
    fig['data'][0]['showscale'] = True

    fig.show()

In [9]:
show_confusion_matrix(conf_mat_fb)

In [10]:
show_confusion_matrix(conf_mat_sb)

### Orginal Set

In [11]:
rf_clf = RandomForestClassifier(n_jobs=-1)

y_pred = cross_val_predict(rf_clf, X_train, y_train.values.ravel(), cv=10, n_jobs=-1)

conf_mat = confusion_matrix(y_train.values.ravel(), y_pred)

In [12]:
show_confusion_matrix(conf_mat)

### Best for regression + classification

In [13]:
best_features_reg_class = ['v_Vel', 'lateral_current_lane', 'longit_pos_vehicle1', 'longit_pos_vehicle2', 'longit_pos_vehicle3', 'lat_pos_vehicle1', 'lat_pos_vehicle2', 'iTTC_ref3', 'v_Vel_preced1', 'v_Vel_preced2', 'longit_pos_preced1', 'longit_pos_follow1', 'longit_pos_preced2', 'longit_pos_follow2', 'iTTC_preced1', 'iTTC_follow1', 'iTTC_preced2', 'iTTC_follow2']

rf_clf = RandomForestClassifier(n_jobs=-1)

y_pred = cross_val_predict(rf_clf, X_train[best_features_reg_class], y_train.values.ravel(), cv=10, n_jobs=-1)

conf_mat = confusion_matrix(y_train.values.ravel(), y_pred)

In [14]:
show_confusion_matrix(conf_mat)

### SFFS Classification

In [15]:
best_features_SFFS_class = ['lateral_current_lane', 'longit_pos_vehicle1', 'lat_pos_vehicle1', 'lat_pos_vehicle2', 'iTTC_ref3', 'v_Vel_follow1', 'iTTC_preced2']

rf_clf = RandomForestClassifier(n_jobs=-1)

y_pred = cross_val_predict(rf_clf, X_train[best_features_SFFS_class], y_train.values.ravel(), cv=10, n_jobs=-1)

conf_mat = confusion_matrix(y_train.values.ravel(), y_pred)

In [16]:
show_confusion_matrix(conf_mat)

### SBFS Classification

In [17]:
best_features_SBFS_class = ['lateral_current_lane', 'longit_pos_vehicle1', 'longit_pos_vehicle2', 'lat_pos_vehicle1', 'lat_pos_vehicle2', 'iTTC_ref3', 'v_Vel_follow1', 'longit_pos_follow1', 'longit_pos_preced2', 'iTTC_preced2']

rf_clf = RandomForestClassifier(n_jobs=-1)

y_pred = cross_val_predict(rf_clf, X_train[best_features_SBFS_class], y_train.values.ravel(), cv=10, n_jobs=-1)

conf_mat = confusion_matrix(y_train.values.ravel(), y_pred)

In [18]:
show_confusion_matrix(conf_mat)

### Exhaustive Classification

In [19]:
best_features_EFS_class = ['lateral_current_lane', 'longit_pos_vehicle3', 'lat_pos_vehicle1', 'iTTC_ref3', 'v_Vel_follow1', 'longit_pos_follow1', 'longit_pos_follow2', 'iTTC_preced1']

rf_clf = RandomForestClassifier(n_jobs=-1)

y_pred = cross_val_predict(rf_clf, X_train[best_features_EFS_class], y_train.values.ravel(), cv=10, n_jobs=-1)

conf_mat = confusion_matrix(y_train.values.ravel(), y_pred)

In [20]:
show_confusion_matrix(conf_mat)

## Regression

In [6]:
def plot_target_predicted_curve(X, y):
    rf_rgr = RandomForestRegressor(n_jobs=-1)

    y_pred = cross_val_predict(rf_rgr, X, y.values.ravel(), cv=10, n_jobs=-1)

    data = {
        'Target' : y_train_time['time'].tolist(),
        'Predicted' : y_pred
    }

    df = pd.DataFrame(data, columns=['Target', 'Predicted'])
    df = df.sort_values(by=['Target'])

    x = list(range(len(X)))
    y_target = df['Target'].tolist()
    y_predicted = df['Predicted'].tolist()
    
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=x,
        y=y_predicted,
        name="Predicted Time",
        connectgaps=True
    ))

    fig.add_trace(go.Scatter(
        x=x,
        y=y_target,
        name="Actual Time",
        connectgaps=True,
        line_width=2
    ))

    

    fig.update_layout(
        title='Actual vs. Predicted Time',
        yaxis_title='Time (s)',
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(0,0,0,0)',
        uniformtext_minsize=5,
        uniformtext_mode='hide'
    )

    fig.update_xaxes(showline=True, linewidth=0.5, linecolor='black', showticklabels=False)
    fig.update_yaxes(showline=True, linewidth=0.5, linecolor='black', showgrid=True, gridwidth=0.5, gridcolor='grey')

    fig.show()

#### Original Set

In [22]:
plot_target_predicted_curve(X_train, y_train_time)

#### Best Features for Regression

In [23]:
fb_bf_regression = ['v_Vel', 'lateral_current_lane', 'v_Vel_Ref3', 'longit_pos_vehicle1', 'longit_pos_vehicle2', 'longit_pos_vehicle3', 'lat_pos_vehicle1', 'lat_pos_vehicle2', 'iTTC_ref3', 'v_Vel_preced1', 'v_Vel_preced2', 'longit_pos_preced1', 'longit_pos_follow1', 'longit_pos_preced2', 'longit_pos_follow2', 'iTTC_preced1', 'iTTC_follow1', 'iTTC_preced2', 'iTTC_follow2']

sb_bf_regression = ['lateral_current_lane', 'iTTC_follow1', 'longit_pos_follow1', 'longit_pos_preced2', 'longit_pos_vehicle3', 'longit_pos_preced1', 'v_Vel', 'iTTC_preced1', 'iTTC_preced2', 'lat_pos_vehicle1', 'v_Vel_preced1', 'v_Vel_Ref3', 'lat_pos_vehicle2', 'longit_pos_vehicle2', 'iTTC_follow2', 'v_Vel_preced2', 'longit_pos_vehicle1', 'iTTC_ref3', 'longit_pos_follow2']

plot_target_predicted_curve(X_train[fb_bf_regression], y_train_time)
plot_target_predicted_curve(X_train[sb_bf_regression], y_train_time)

##### Best feature set for Regression + Classification

In [24]:
best_features_reg_class = ['v_Vel', 'lateral_current_lane', 'longit_pos_vehicle1', 'longit_pos_vehicle2', 'longit_pos_vehicle3', 'lat_pos_vehicle1', 'lat_pos_vehicle2', 'iTTC_ref3', 'v_Vel_preced1', 'v_Vel_preced2', 'longit_pos_preced1', 'longit_pos_follow1', 'longit_pos_preced2', 'longit_pos_follow2', 'iTTC_preced1', 'iTTC_follow1', 'iTTC_preced2', 'iTTC_follow2']

plot_target_predicted_curve(X_train[best_features_reg_class], y_train_time)

#### SFFS Regression

In [25]:
best_features_SFFS_rgr = ['lateral_current_lane', 'lat_pos_vehicle1', 'iTTC_ref3', 'longit_pos_preced1', 'longit_pos_preced2', 'iTTC_preced2', 'iTTC_follow2']


plot_target_predicted_curve(X_train[best_features_SFFS_rgr], y_train_time)

#### SBFS Regression

In [26]:
best_features_SBFS_rgr = ['v_Vel', 'lateral_current_lane', 'longit_pos_vehicle2', 'lat_pos_vehicle1', 'lat_pos_vehicle2', 'iTTC_ref3', 'longit_pos_preced1', 'longit_pos_follow1', 'longit_pos_preced2', 'longit_pos_follow2', 'iTTC_preced2', 'iTTC_follow2']

plot_target_predicted_curve(X_train[best_features_SBFS_rgr], y_train_time)

#### Exhaustive Regression

In [10]:
best_features_EFS_regr = ['lateral_current_lane', 'longit_pos_follow1', 'longit_pos_follow2', 'iTTC_preced1', 'iTTC_preced2']

plot_target_predicted_curve(X_train[best_features_EFS_regr], y_train_time)