# Rotation detection using Machine Learning and the databot2.0

## Imports

In [None]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

## Gather / Collect Data

## Execute rotational_data_collector.py script

We will need to collect rotational data for the follow targets:

* stationary
* pendulum
* vertical
* horizontal

## Explore and Visualize Data

In [None]:
df_steady = pd.read_json("./data/steady.txt", lines=True)
df_pendulum = pd.read_json("./data/pendulum.txt", lines=True)
df_horizontal = pd.read_json("./data/horizontal.txt", lines=True)
df_vertical = pd.read_json("./data/vertical.txt", lines=True)


In [None]:
df_steady.shape

In [None]:
df_pendulum.shape

In [None]:
df_horizontal.shape

In [None]:
df_vertical.shape

In [None]:
# Merge all of the separate dataframes into a single dataframe
df = pd.concat([df_steady, df_pendulum, df_horizontal, df_vertical])


In [None]:
df.shape

In [None]:
df.info()

As we would expect, there are no missing values and no Strings.  Each column is made up of numbers

In [None]:
df.head()

In [None]:
df.tail()

### Visualizations

In [None]:
# Add a name column to make labeling easier
df['rotation_name'] = df['rotation'].map({0:'steady', 1:'pendulum', 2:'horizontal', 3:'vertical'})

### Visualize the Acceleration X data


In [None]:
sns.scatterplot(data=df, x='time', y='linear_acceleration_x', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )


### Visualize the Acceleration Y data


In [None]:
sns.scatterplot(data=df, x='time', y='linear_acceleration_y', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Visualize the Acceleration Z data


In [None]:
sns.scatterplot(data=df, x='time', y='linear_acceleration_z', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Visualize the Gyroscope X data

In [None]:
sns.scatterplot(data=df, x='time', y='gyro_x', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Visualize the Gyroscope Y data

In [None]:
sns.scatterplot(data=df, x='time', y='gyro_y', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Visualize the Gyroscope Z data

In [None]:
sns.scatterplot(data=df, x='time', y='gyro_z', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Absolute Linear Acceleration

In [None]:
sns.scatterplot(data=df, x='time', y='absolute_linear_acceleration', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

## Prepare Data

Some machine learning models will need us to scale the values to the same range.  

RandomForestClassifier does not have that restriction

Because we have no missing data values and all of the values are numeric, there really is no significant data preparation that needs to be done.

Keep in mind... if we had picked a model other an a Tree based model, then we would have to scale the input values

In [None]:
y = df['rotation']
X = df.drop(columns=['rotation', 'time', 'timestamp', 'rotation_name'])

In [None]:
X.shape

In [None]:
y.shape

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, shuffle=True)

## Train a Machine Learning Model

In [None]:
model = RandomForestClassifier()

In [None]:
model.fit(X_train, y_train)

In [None]:
y_pred_train = model.predict(X_train)


In [None]:
accuracy_score(y_train, y_pred_train)


## Test Machine Learning Model

In [None]:
y_pred_test = model.predict(X_test)

In [None]:
accuracy_score(y_test, y_pred_test)

### What were the most important features to the RandomForestClassifier model

In [None]:
importance_list = []

for tup in zip(X_train.columns, model.feature_importances_):
    
    importance_list.append(tup) 
sorted_importance_list = sorted(importance_list, key=lambda tup: tup[1], reverse = True)
sorted_importance_list[0:10]

## Make Predictions on new data

Before we make predictions on new data, we would like to re-train the model on all of the collected data.

In [None]:
model = RandomForestClassifier()

In [None]:
model.fit(X,y)

In [None]:
# Save the model so we can use it to autonmously drive the car
import joblib

joblib.dump(model, "./rotation_detector_model.sav")


Let's go write a new script to read new data from the databot and use the model to make new predictions.