# Rotation detection using Machine Learning and the databot2.0

## Imports

In [2]:
import pandas as pd
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Gather / Collect Data

## Execute rotational_data_collector.py script

We will need to collect rotational data for the follow targets:

* stationary
* pendulum
* vertical
* horizontal

# Explore and Visualize Data

In [3]:
df_steady = pd.read_json("./data2/steady.txt", lines=True)
df_pendulum = pd.read_json("./data2/pendulum.txt", lines=True)
df_horizontal = pd.read_json("./data2/horizontal.txt", lines=True)
df_vertical = pd.read_json("./data2/vertical.txt", lines=True)


In [11]:
df_steady.shape

(1000, 7)

In [12]:
df_pendulum.shape

(1000, 7)

In [13]:
df_horizontal.shape

(1000, 7)

In [14]:
df_vertical.shape

(1000, 7)

In [15]:
# Merge all of the separate dataframes into a single dataframe
df = pd.concat([df_steady, df_pendulum, df_horizontal, df_vertical])


In [16]:
df.shape

(4000, 7)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 4000 entries, 0 to 999
Data columns (total 7 columns):
 #   Column                        Non-Null Count  Dtype         
---  ------                        --------------  -----         
 0   time                          4000 non-null   float64       
 1   linear_acceleration_x         4000 non-null   float64       
 2   linear_acceleration_y         4000 non-null   float64       
 3   linear_acceleration_z         4000 non-null   float64       
 4   absolute_linear_acceleration  4000 non-null   float64       
 5   timestamp                     4000 non-null   datetime64[ns]
 6   rotation                      4000 non-null   int64         
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 250.0 KB


As we would expect, there are no missing values and no Strings.  Each column is made up of numbers

In [18]:
df.head()

Unnamed: 0,time,linear_acceleration_x,linear_acceleration_y,linear_acceleration_z,absolute_linear_acceleration,timestamp,rotation
0,0.2,0.26,-0.0,-0.07,0.27,2023-09-27 20:51:55.672769024,0
1,0.4,0.02,-0.13,-0.08,0.16,2023-09-27 20:51:55.874928128,0
2,0.6,-0.21,0.13,-0.17,0.31,2023-09-27 20:51:56.088963072,0
3,0.8,0.17,-0.13,-0.36,0.42,2023-09-27 20:51:56.280794112,0
4,1.0,0.06,0.11,-0.24,0.28,2023-09-27 20:51:56.471280128,0


In [19]:
df.tail()

Unnamed: 0,time,linear_acceleration_x,linear_acceleration_y,linear_acceleration_z,absolute_linear_acceleration,timestamp,rotation
995,199.34,89.76,-100.28,-27.97,137.45,2023-09-27 21:03:11.612211968,2
996,199.54,76.91,-81.77,-26.25,115.28,2023-09-27 21:03:11.815014912,2
997,199.74,135.82,-136.79,-55.84,200.69,2023-09-27 21:03:12.006514944,2
998,199.94,76.75,-85.45,-23.46,117.23,2023-09-27 21:03:12.208093952,2
999,200.14,90.41,-88.34,-26.01,129.06,2023-09-27 21:03:12.410193920,2


### Visualizations

In [21]:
# Add a name column to make labeling easier
df['rotation_name'] = df['rotation'].map({0:'steady', 1:'pendulum', 2:'horizontal', 3:'vertical'})

### Visualize the Acceleration X data


In [None]:
sns.scatterplot(data=df, x='time', y='linear_acceleration_x', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )


### Visualize the Acceleration Y data


In [None]:
sns.scatterplot(data=df, x='time', y='linear_acceleration_y', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Visualize the Acceleration Z data


In [None]:
sns.scatterplot(data=df, x='time', y='linear_acceleration_z', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Visualize the Gyroscope X data

In [None]:
sns.scatterplot(data=df, x='time', y='gyro_x', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Visualize the Gyroscope Y data

In [None]:
sns.scatterplot(data=df, x='time', y='gyro_y', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Visualize the Gyroscope Z data

In [None]:
sns.scatterplot(data=df, x='time', y='gyro_z', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

### Absolute Linear Acceleration

In [None]:
sns.scatterplot(data=df, x='time', y='absolute_linear_acceleration', hue='rotation_name', alpha=0.2, palette=['green', 'orange', 'dodgerblue', 'red'] )

## Save DataFrame to CSV

In [None]:
df.to_csv("./data/all_data.csv", header=True, index=False)

In [None]:
new_df = pd.read_csv("./data/all_data.csv")

In [None]:
new_df.shape

In [None]:
new_df.head()

# Prepare Data

Some machine learning models will need us to scale the values to the same range.  

RandomForestClassifier does not have that restriction

Because we have no missing data values and all of the values are numeric, there really is no significant data preparation that needs to be done.

Keep in mind... if we had picked a model other an a Tree based model, then we would have to scale the input values

In [22]:
df.head()

Unnamed: 0,time,linear_acceleration_x,linear_acceleration_y,linear_acceleration_z,absolute_linear_acceleration,timestamp,rotation,rotation_name
0,0.2,0.26,-0.0,-0.07,0.27,2023-09-27 20:51:55.672769024,0,steady
1,0.4,0.02,-0.13,-0.08,0.16,2023-09-27 20:51:55.874928128,0,steady
2,0.6,-0.21,0.13,-0.17,0.31,2023-09-27 20:51:56.088963072,0,steady
3,0.8,0.17,-0.13,-0.36,0.42,2023-09-27 20:51:56.280794112,0,steady
4,1.0,0.06,0.11,-0.24,0.28,2023-09-27 20:51:56.471280128,0,steady


In [23]:
y = df['rotation']
X = df.drop(columns=['rotation', 'time', 'timestamp', 'rotation_name'])

In [24]:
X.shape

(4000, 4)

In [25]:
y.shape

(4000,)

In [26]:
from sklearn.model_selection import train_test_split

In [27]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, shuffle=True)

# Train a Machine Learning Model

In [28]:
model = RandomForestClassifier()

In [29]:
model.fit(X_train, y_train)

In [30]:
y_pred_train = model.predict(X_train)


In [31]:
accuracy_score(y_train, y_pred_train)


1.0

# Test Machine Learning Model

In [32]:
y_pred_test = model.predict(X_test)

In [33]:
accuracy_score(y_test, y_pred_test)

0.86375

### What were the most important features to the RandomForestClassifier model

In [34]:
importance_list = []

for tup in zip(X_train.columns, model.feature_importances_):
    
    importance_list.append(tup) 
sorted_importance_list = sorted(importance_list, key=lambda tup: tup[1], reverse = True)
sorted_importance_list[0:10]

[('absolute_linear_acceleration', 0.34514988612302),
 ('linear_acceleration_x', 0.23512207383251874),
 ('linear_acceleration_z', 0.229252667008803),
 ('linear_acceleration_y', 0.19047537303565823)]

# Make Predictions on new data

Before we make predictions on new data, we would like to re-train the model on all of the collected data.

In [35]:
model = RandomForestClassifier()

In [36]:
model.fit(X,y)

In [37]:
# Save the model so we can use it to autonmously drive the car
import joblib

joblib.dump(model, "./rotation_detector_model_no_gyro.sav")


['./rotation_detector_model_no_gyro.sav']

Let's go write a new script to read new data from the databot and use the model to make new predictions.

# Try to improve the performance of the model

In [4]:
df = pd.concat([df_steady, df_pendulum, df_vertical])


In [5]:
y = df['rotation']
X = df.drop(columns=['rotation', 'time', 'timestamp'])

In [6]:
model = RandomForestClassifier()

In [7]:
model.fit(X,y)

In [8]:
import joblib

joblib.dump(model, "./rotation_detector_model_no_horizontal.sav")


['./rotation_detector_model_no_horizontal.sav']