## Mount to Colab runtime

In [2]:
from google.colab import drive
import sys
if('google.colab' in sys.modules):
  print("Google drive detected, mounting...")
  drive.mount('/content/gdrive')
else:
  print("No Google drive found, ignoring...")
print("Done checking")

Google drive detected, mounting...
Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).
Done checking


## Installing dependencies

In [3]:
print("Installing program...")
if 'google.colab' in sys.modules:
  !pip install numpy pandas matplotlib seaborn scikit-learn tensorflow keras torch opencv-python labelme statsmodels scipy missingno
else:
  %pip install -r studio2.req.txt

Installing program...


## Import dependencies

In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks
from scipy import interpolate

import os
import warnings

## Define Config

In [5]:
warnings.filterwarnings('ignore')

pd.options.display.max_columns = None
pd.options.display.max_rows = None
pd.options.display.float_format = '{:.7f}'.format

## 1. Data Collection

### Define columns, file to read and what class should be used for that file
- Since my student ID ends with 2, thus I will be doing Right Upper Arm (x,y,z) and Left Upper Arm (x,y,z)
- Boning dataset will have a class of '0'
- Slicing dataset will have a class of '1'
- There will also be a 'Frame' column as well

In [6]:
os.chdir("/content/gdrive/MyDrive") if 'google.colab' in sys.modules else None
BASE_PATH = os.getcwd() + "/Colab Notebooks/COS40007/Assignment 2/ampc2" if 'google.colab' in sys.modules else os.getcwd() + "/ampc2"
contents_to_read = {
    'boning': {
        'fName': BASE_PATH + '/Boning.csv',
        'class': 0
    },
    'slicing': {
        'fName': BASE_PATH + '/Slicing.csv',
        'class': 1
    }
}
columns_to_read = [f'Right Upper Arm {k}' for k in ['x', 'y', 'z']] + [f'Left Upper Arm {k}' for k in ['x', 'y', 'z']] + ['Frame']


### Read the dataset with chosen columns, append the 'class' feature to the file

In [7]:
boning_raw_df = pd.read_csv(contents_to_read['boning']['fName'], usecols=columns_to_read)
boning_df = boning_raw_df.copy()
boning_df['class'] = contents_to_read['boning']['class']
slicing_raw_df = pd.read_csv(contents_to_read['slicing']['fName'], usecols=columns_to_read)
slicing_df = slicing_raw_df.copy()
slicing_df['class'] = contents_to_read['slicing']['class']
print(f"Shape of boning: {boning_df.shape}")
print(f"Shape of slicing: {slicing_df.shape}")

Shape of boning: (54180, 8)
Shape of slicing: (17880, 8)


### Concat two datasets to be one and save it as combined_data.csv

In [8]:
concatenated_df = pd.concat([boning_df, slicing_df], ignore_index=True)
concatenated_df.to_csv(BASE_PATH + "/combined_data.csv", index=False)
concatenated_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              72060 non-null  int64  
 1   Right Upper Arm x  72060 non-null  float64
 2   Right Upper Arm y  72060 non-null  float64
 3   Right Upper Arm z  72060 non-null  float64
 4   Left Upper Arm x   72060 non-null  float64
 5   Left Upper Arm y   72060 non-null  float64
 6   Left Upper Arm z   72060 non-null  float64
 7   class              72060 non-null  int64  
dtypes: float64(6), int64(2)
memory usage: 4.4 MB


## 2. Create Composite Columns

### Read the previously saved dataset

In [9]:
concatenated_raw_df = pd.read_csv(BASE_PATH + "/combined_data.csv")
concatenated_df = concatenated_raw_df.copy()
concatenated_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Frame              72060 non-null  int64  
 1   Right Upper Arm x  72060 non-null  float64
 2   Right Upper Arm y  72060 non-null  float64
 3   Right Upper Arm z  72060 non-null  float64
 4   Left Upper Arm x   72060 non-null  float64
 5   Left Upper Arm y   72060 non-null  float64
 6   Left Upper Arm z   72060 non-null  float64
 7   class              72060 non-null  int64  
dtypes: float64(6), int64(2)
memory usage: 4.4 MB


In [10]:
concatenated_df.head()

Unnamed: 0,Frame,Right Upper Arm x,Right Upper Arm y,Right Upper Arm z,Left Upper Arm x,Left Upper Arm y,Left Upper Arm z,class
0,0,0.5593331,0.0244512,0.5238757,0.0052957,-0.3015276,-0.2318504,0
1,1,0.364502,0.1743617,0.5789666,-0.1390283,0.029267,0.0519042,0
2,2,-0.0410117,0.1339995,0.2854964,0.0662767,-0.2115488,0.1329666,0
3,3,0.0079405,0.223349,0.1332062,0.1735293,-0.1076815,0.040102,0
4,4,0.4181769,0.3740246,0.0801937,0.0171759,-0.2320744,0.2785343,0


### Utils function for calculating Root mean square value, Roll and Pitch

In [11]:
def calc_rmsq_for_cols(df: pd.DataFrame, cols: list[str]):
  return np.sqrt(np.mean(df[cols] ** 2, axis=1))

def calc_roll_for_col(df: pd.DataFrame, col: str):
  return 180 * np.arctan2(df[f'{col} y'], np.sqrt(df[f'{col} x'] ** 2 + df[f'{col} z'] ** 2)) / np.pi

def calc_pitch_for_col(df: pd.DataFrame, col: str):
  return 180 * np.arctan2(df[f'{col} x'], np.sqrt(df[f'{col} y'] ** 2 + df[f'{col} z'] ** 2)) / np.pi

### Calculate required composite data for Right Hand features



In [12]:
concatenated_df['right_upper_xy_rmsq'] = calc_rmsq_for_cols(concatenated_df, ['Right Upper Arm x', 'Right Upper Arm y'])
concatenated_df['right_upper_yz_rmsq'] = calc_rmsq_for_cols(concatenated_df, ['Right Upper Arm y', 'Right Upper Arm z'])
concatenated_df['right_upper_xz_rmsq'] = calc_rmsq_for_cols(concatenated_df, ['Right Upper Arm x', 'Right Upper Arm z'])
concatenated_df['right_upper_xyz_rmsq'] = calc_rmsq_for_cols(concatenated_df, ['Right Upper Arm x', 'Right Upper Arm y', 'Right Upper Arm z'])

concatenated_df['right_upper_roll'] = calc_roll_for_col(concatenated_df, 'Right Upper Arm')
concatenated_df['right_upper_pitch'] = calc_pitch_for_col(concatenated_df, 'Right Upper Arm')

### Calculate required composite data for Left Hand features

In [13]:
concatenated_df['left_upper_xy_rmsq'] = calc_rmsq_for_cols(concatenated_df, ['Left Upper Arm x', 'Left Upper Arm y'])
concatenated_df['left_upper_yz_rmsq'] = calc_rmsq_for_cols(concatenated_df, ['Left Upper Arm y', 'Left Upper Arm z'])
concatenated_df['left_upper_xz_rmsq'] = calc_rmsq_for_cols(concatenated_df, ['Left Upper Arm x', 'Left Upper Arm z'])
concatenated_df['left_upper_xyz_rmsq'] = calc_rmsq_for_cols(concatenated_df, ['Left Upper Arm x', 'Left Upper Arm y', 'Left Upper Arm z'])

concatenated_df['left_upper_roll'] = calc_roll_for_col(concatenated_df, 'Left Upper Arm')
concatenated_df['left_upper_pitch'] = calc_pitch_for_col(concatenated_df, 'Left Upper Arm')

In [14]:
concatenated_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Frame                 72060 non-null  int64  
 1   Right Upper Arm x     72060 non-null  float64
 2   Right Upper Arm y     72060 non-null  float64
 3   Right Upper Arm z     72060 non-null  float64
 4   Left Upper Arm x      72060 non-null  float64
 5   Left Upper Arm y      72060 non-null  float64
 6   Left Upper Arm z      72060 non-null  float64
 7   class                 72060 non-null  int64  
 8   right_upper_xy_rmsq   72060 non-null  float64
 9   right_upper_yz_rmsq   72060 non-null  float64
 10  right_upper_xz_rmsq   72060 non-null  float64
 11  right_upper_xyz_rmsq  72060 non-null  float64
 12  right_upper_roll      72060 non-null  float64
 13  right_upper_pitch     72060 non-null  float64
 14  left_upper_xy_rmsq    72060 non-null  float64
 15  left_upper_yz_rmsq 

### Save it as "composited_data.csv"

In [15]:
concatenated_df.to_csv(BASE_PATH + "/composited_data.csv", index=False)
concatenated_df.head()

Unnamed: 0,Frame,Right Upper Arm x,Right Upper Arm y,Right Upper Arm z,Left Upper Arm x,Left Upper Arm y,Left Upper Arm z,class,right_upper_xy_rmsq,right_upper_yz_rmsq,right_upper_xz_rmsq,right_upper_xyz_rmsq,right_upper_roll,right_upper_pitch,left_upper_xy_rmsq,left_upper_yz_rmsq,left_upper_xz_rmsq,left_upper_xyz_rmsq,left_upper_roll,left_upper_pitch
0,0,0.5593331,0.0244512,0.5238757,0.0052957,-0.3015276,-0.2318504,0,0.395886,0.3708394,0.5418945,0.4426802,1.8274536,46.8437257,0.2132451,0.2689549,0.1639857,0.2196221,-52.4354081,0.7976651
1,1,0.364502,0.1743617,0.5789666,-0.1390283,0.029267,0.0519042,0,0.2857129,0.4275537,0.4837686,0.4076216,14.297917,31.0827995,0.1004625,0.0421343,0.1049355,0.0873298,11.1564279,-66.8002446
2,2,-0.0410117,0.1339995,0.2854964,0.0662767,-0.2115488,0.1329666,0,0.0990904,0.2230068,0.2039487,0.1836174,24.9189818,-7.4091256,0.156757,0.176682,0.1050541,0.1492488,-54.9199712,14.855554
3,3,0.0079405,0.223349,0.1332062,0.1735293,-0.1076815,0.040102,0,0.1580314,0.1838867,0.0943582,0.1502129,59.1432651,1.7489195,0.1444087,0.0812511,0.1259377,0.1201609,-31.1572913,56.488559
4,4,0.4181769,0.3740246,0.0801937,0.0171759,-0.2320744,0.2785343,0,0.3967155,0.2704861,0.3010839,0.3272091,41.2964256,47.5494519,0.1645502,0.256359,0.1973276,0.209551,-39.747551,2.7124084


## 3. Data pre-processing and Feature computation

### Read the previously save "composited_data.csv"

In [16]:
composited_raw_df = pd.read_csv(BASE_PATH + "/composited_data.csv")
composited_df = composited_raw_df.copy()
composited_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 72060 entries, 0 to 72059
Data columns (total 20 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   Frame                 72060 non-null  int64  
 1   Right Upper Arm x     72060 non-null  float64
 2   Right Upper Arm y     72060 non-null  float64
 3   Right Upper Arm z     72060 non-null  float64
 4   Left Upper Arm x      72060 non-null  float64
 5   Left Upper Arm y      72060 non-null  float64
 6   Left Upper Arm z      72060 non-null  float64
 7   class                 72060 non-null  int64  
 8   right_upper_xy_rmsq   72060 non-null  float64
 9   right_upper_yz_rmsq   72060 non-null  float64
 10  right_upper_xz_rmsq   72060 non-null  float64
 11  right_upper_xyz_rmsq  72060 non-null  float64
 12  right_upper_roll      72060 non-null  float64
 13  right_upper_pitch     72060 non-null  float64
 14  left_upper_xy_rmsq    72060 non-null  float64
 15  left_upper_yz_rmsq 

### Create statistical features for the 18 columns per minute (1 min = 60fpm)
These includes:
- Mean values
- Standard deviation values
- Min values
- Max values
- Area under the curve (AUC)
- Number of peaks

In [17]:
new_cols = {}
FPM = 60
num_of_min = len(composited_df) // FPM
print(num_of_min)

for column in concatenated_df.columns:
  if column not in ['Frame', 'class']:
    values = {
      'mean': [],
      'max': [],
      'min': [],
      'std': [],
      'auc': [],
      'peak': []
    }
    for i in range(num_of_min):
      start, end = i * FPM, (i + 1) * FPM
      values['mean'].append(np.mean(concatenated_df[column][start:end]))
      values['max'].append(np.max(concatenated_df[column][start:end]))
      values['min'].append(np.min(concatenated_df[column][start:end]))
      values['std'].append(np.std(concatenated_df[column][start:end]))
      values['auc'].append(np.trapz(concatenated_df[column][start:end]))
      peaks, _ = find_peaks(concatenated_df[column][start:end])
      values['peak'].append(len(peaks))
    new_cols[f'{column}_mean'] = values['mean']
    new_cols[f'{column}_max'] = values['max']
    new_cols[f'{column}_min'] = values['min']
    new_cols[f'{column}_std'] = values['std']
    new_cols[f'{column}_auc'] = values['auc']
    new_cols[f'{column}_peak'] = values['peak']

# print(new_cols)

new_features_df = pd.DataFrame(new_cols)
new_features_df['class'] = composited_df['class'][::FPM].reset_index(drop=True)
new_features_df["Minute"] = range(1, num_of_min + 1)

1201


### Saved the processed statistical dataset as 'processed_all_data.csv'

In [18]:
new_features_df.to_csv(BASE_PATH + "/processed_all_data.csv")
print("Shape: ", new_features_df.shape)
print("Info: ", new_features_df.info())

Shape:  (1201, 110)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1201 entries, 0 to 1200
Columns: 110 entries, Right Upper Arm x_mean to Minute
dtypes: float64(90), int64(20)
memory usage: 1.0 MB
Info:  None


## 4. Training

### Import required dependencies

In [19]:
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score, GridSearchCV, train_test_split
from sklearn.feature_selection import SelectKBest, f_classif
from sklearn.decomposition import PCA
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neural_network import MLPClassifier

### Read the processed data

In [20]:
all_data_raw_df = pd.read_csv(BASE_PATH + "/processed_all_data.csv")
all_data_df = all_data_raw_df.copy()
print(all_data_df.head())
print("Info: ", all_data_df.info())

   Unnamed: 0  Right Upper Arm x_mean  Right Upper Arm x_max  \
0           0               0.0785406              0.5593331   
1           1              -0.0684224              1.9943614   
2           2              -0.3594255              2.3014753   
3           3               0.4137509              4.0874908   
4           4              -0.3973401              9.1455385   

   Right Upper Arm x_min  Right Upper Arm x_std  Right Upper Arm x_auc  \
0             -0.2900849              0.1847790              4.4727174   
1             -1.2409593              0.6084912             -4.3487300   
2             -2.4997970              1.0597075            -21.3840835   
3             -2.9570166              1.6372813             25.2034857   
4             -4.5042750              2.5578370            -22.4158592   

   Right Upper Arm x_peak  Right Upper Arm y_mean  Right Upper Arm y_max  \
0                      13               0.0132966              0.6342646   
1                 

### Perform train test split, with 30% of test data

In [21]:
X_vals = all_data_df.drop(['class', 'Minute'], axis=1)
Y_vals = all_data_df['class']

X_train, X_test, Y_train, Y_test = train_test_split(X_vals, Y_vals, test_size=0.3, random_state=42)

### Predict with Support Vector Machine

In [22]:
svc = svm.SVC()
svc.fit(X_train, Y_train)
Y_pred = svc.predict(X_test)
acc = accuracy_score(Y_test, Y_pred)

print(f"Accuracy of the SVM is: {acc * 100:2f}")


Accuracy of the SVM is: 97.783934


### 10-fold cross validation mean accuracy of SVM

In [23]:
svc = svm.SVC()
cross_val = cross_val_score(svc, X_vals, Y_vals, cv = 10)
print(f"10-fold cross validation mean accuracy score: {cross_val.mean()*100:2f}")

10-fold cross validation mean accuracy score: 96.005510


### Find the best set of values for the model using GridSearchCV

In [24]:
param_grid = {'C': [0.1, 1, 10, 100, 1000],
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
              'kernel': ['rbf']}

grid = GridSearchCV(svm.SVC(), param_grid, refit = True, verbose = 3)

# fitting the model for grid search
grid.fit(X_train, Y_train)

print("Best params to fit: ", grid.best_params_)

Fitting 5 folds for each of 25 candidates, totalling 125 fits
[CV 1/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.744 total time=   0.1s
[CV 2/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.744 total time=   0.1s
[CV 3/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.744 total time=   0.1s
[CV 4/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.744 total time=   0.1s
[CV 5/5] END ........C=0.1, gamma=1, kernel=rbf;, score=0.738 total time=   0.1s
[CV 1/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.744 total time=   0.1s
[CV 2/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.744 total time=   0.1s
[CV 3/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.744 total time=   0.1s
[CV 4/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.744 total time=   0.1s
[CV 5/5] END ......C=0.1, gamma=0.1, kernel=rbf;, score=0.738 total time=   0.2s
[CV 1/5] END .....C=0.1, gamma=0.01, kernel=rbf;, score=0.744 total time=   0.3s
[CV 2/5] END .....C=0.1, gamma=0.01, kernel=rbf

### SVM training and predicting with hyperparameter tuning

In [25]:
svc_with_hyp = svm.SVC(C=grid.best_params_['C'], gamma=grid.best_params_['gamma'], kernel=grid.best_params_['kernel'])
svc_with_hyp.fit(X_train, Y_train)

y_pred_with_hyp = svc_with_hyp.predict(X_test)

accuracy_score_with_hyp = accuracy_score(Y_test, y_pred_with_hyp)

print(f"Accuracy of the SVM with hyperparameters tuning: {accuracy_score_with_hyp * 100:2f}")

Accuracy of the SVM with hyperparameters tuning: 77.285319


### 10-fold cross validation mean accuracy of SVM with hyperparameter tuning

In [26]:
cv_scores_with_hyp = cross_val_score(svc_with_hyp, X_vals, Y_vals, cv = 10)
print(f"10-fold cross validation mean accuracy score with hyperparameter tuning: {cv_scores_with_hyp.mean()*100:2f}")

10-fold cross validation mean accuracy score with hyperparameter tuning: 75.187328


### Select features and split based on the selected features using SelectKBest

In [27]:
selector = SelectKBest(f_classif, k=100)
X_selected = selector.fit_transform(X_vals, Y_vals)

X_train, X_test, Y_train, Y_test = train_test_split(X_selected, Y_vals, test_size=0.3, random_state=42)

### SVM training and predicting with feature selection + hyperparameter tuning

In [28]:
svc_with_hyp.fit(X_train, Y_train)
y_pred_hyp_selected = svc_with_hyp.predict(X_test)
accuracy_score_with_hyp_selected = accuracy_score(Y_test, y_pred_hyp_selected)
print(f"Accuracy with hyperparameter + selected feature:  {accuracy_score_with_hyp_selected * 100:2f}")

Accuracy with hyperparameter + selected feature:  77.285319


### 10-fold cross validation mean accuracy of SVM with hyperparameter tuning + features selection

In [29]:
cv_scores_with_hyp_selected = cross_val_score(svc_with_hyp, X_selected, Y_vals, cv = 10)
print(f"10-fold cross validation accuracy with hyperparameter + selected feature:  {cv_scores_with_hyp_selected.mean() * 100:2f}")

10-fold cross validation accuracy with hyperparameter + selected feature:  75.187328


### Perform PCA to reduce dimensionality

In [30]:
pca = PCA(n_components=10)
X_pca = pca.fit_transform(X_vals)

X_train, X_test, Y_train, Y_test = train_test_split(X_pca, Y_vals, test_size=0.2, random_state=1)

### SVM training and predicting with PCA + hyperparameter tuning

In [31]:
svc_with_hyp.fit(X_train, Y_train)
y_pred_hyp_pca = svc_with_hyp.predict(X_test)
accuracy_score_with_hyp_pca = accuracy_score(Y_test, y_pred_hyp_pca)
print(f"Accuracy of the model after PCA: {accuracy_score_with_hyp_pca*100:2f}")

Accuracy of the model after PCA: 79.253112


### 10-fold cross validation mean accuracy of SVM with hyperparameter tuning + PCA

In [32]:
cv_score_with_hyp_pca = cross_val_score(svc_with_hyp, X_pca, Y_vals, cv = 10)
print(f"10-fold cross validation accuracy after PCA: {cv_score_with_hyp_pca.mean() *100:2f}")

10-fold cross validation accuracy after PCA: 75.187328


## Ext 4: SGD, RandomForest and MLPClassifier

### Read the processed data

In [33]:
all_data_raw_df = pd.read_csv(BASE_PATH + "/processed_all_data.csv")
all_data_df = all_data_raw_df.copy()
print(all_data_df.head())
print("Info: ", all_data_df.info())

   Unnamed: 0  Right Upper Arm x_mean  Right Upper Arm x_max  \
0           0               0.0785406              0.5593331   
1           1              -0.0684224              1.9943614   
2           2              -0.3594255              2.3014753   
3           3               0.4137509              4.0874908   
4           4              -0.3973401              9.1455385   

   Right Upper Arm x_min  Right Upper Arm x_std  Right Upper Arm x_auc  \
0             -0.2900849              0.1847790              4.4727174   
1             -1.2409593              0.6084912             -4.3487300   
2             -2.4997970              1.0597075            -21.3840835   
3             -2.9570166              1.6372813             25.2034857   
4             -4.5042750              2.5578370            -22.4158592   

   Right Upper Arm x_peak  Right Upper Arm y_mean  Right Upper Arm y_max  \
0                      13               0.0132966              0.6342646   
1                 

### Perform train test split, with 30% of test data

In [34]:
X_data = all_data_df.drop(['class', 'Minute'], axis=1)
Y_data = all_data_df['class']

X_train, X_test, Y_train, Y_test = train_test_split(X_data, Y_data, test_size=0.3, random_state=42)

### SGDClassifier training and predicting

In [35]:
sgd = SGDClassifier(random_state=42)
sgd.fit(X_train, Y_train)
y_pred_sgd = sgd.predict(X_test)
accuracy_score_sgd = accuracy_score(Y_test, y_pred_sgd)
print(f"Accuracy of the SGD model: {accuracy_score_sgd * 100:2f}")

Accuracy of the SGD model: 93.628809


### 10-fold cross validation mean accuracy of SGDClassifier

In [36]:
cv_score_sgd = cross_val_score(sgd, X_data, Y_data, cv=10)
cv_score_sgd_mean = cv_score_sgd.mean()
print(f"10-fold cross validation accuracy of the SGD model: {cv_score_sgd_mean * 100:2f}")

10-fold cross validation accuracy of the SGD model: 91.092287


### RandomForestClassifier training and predicting

In [37]:
rf = RandomForestClassifier(random_state=42)
rf.fit(X_train, Y_train)
y_pred_rf = rf.predict(X_test)
accuracy_score_rf = accuracy_score(Y_test, y_pred_rf)
print(f"Accuracy of the Random Forest model: {accuracy_score_rf * 100:2f}")

Accuracy of the Random Forest model: 99.722992


### 10-fold cross validation mean accuracy of RandomForestClassifier

In [38]:
cv_score_rf = cross_val_score(rf, X_data, Y_data, cv=10)
cv_score_rf_mean = cv_score_rf.mean()
print(f"10-fold cross validation accuracy of the Random Forest model: {cv_score_rf_mean * 100:2f}")

10-fold cross validation accuracy of the Random Forest model: 96.592975


### MLPClassifier training and predicting

In [39]:
mlp = MLPClassifier(random_state=42)
mlp.fit(X_train, Y_train)
y_pred_mlp = mlp.predict(X_test)
accuracy_score_mlp = accuracy_score(Y_test, y_pred_mlp)
print(f"Accuracy of the MLP model: {accuracy_score_mlp * 100:2f}")

Accuracy of the MLP model: 94.182825


### 10-fold cross validation mean accuracy of MLPClassifier

In [40]:
cv_score_mlp = cross_val_score(mlp, X_data, Y_data, cv=10)
cv_score_mlp_mean = cv_score_mlp.mean()
print(f"10-fold cross validation accuracy of the MLP model: {cv_score_mlp_mean * 100:2f}")

10-fold cross validation accuracy of the MLP model: 94.006887
