In [1]:
!pip install coremltools
!pip install scikit-learn==1.1.2



In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import coremltools as ct



Loading and Processing the Data

In [4]:
sayo_df = pd.read_csv('/content/SaYoPillow.csv')
swell_df_train = pd.read_csv('/content/train_selected.csv')
swell_df_test = pd.read_csv('/content/test_selected.csv')


swell_mapping = {'no stress': 0, 'time pressure': 1, 'interruption': 2}

# Map the values in the 'stress_level' column
swell_df_train['condition'] = swell_df_train['condition'].map(swell_mapping)
swell_df_test['condition'] = swell_df_test['condition'].map(swell_mapping)

# map values in the sayo df to match the swell df
sayo_mapping = {0:0, 1:1, 2:1, 3:2, 4:2}
sayo_df['sl'] = sayo_df['sl'].map(sayo_mapping)

In [5]:
# renaming the sayo dataframe columns
# Dictionary to map old column names to new column names
column_mapping = {
    't': 'body temperature',
    'bo': 'blood oxygen levels',
    'sr.1': 'number of hours of sleep',
}


# Rename the columns
sayo_df.rename(columns=column_mapping, inplace=True)

# we can't get the snoring range and limb movement rate, getting rid of those columns
sayo_df = sayo_df.drop(columns=['sr', 'lm', 'rr', 'hr'])


# Display the renamed DataFrame
print(sayo_df)

     body temperature  blood oxygen levels     rem  number of hours of sleep  \
0              91.840               89.840   99.60                     1.840   
1              91.552               89.552   98.88                     1.552   
2              96.000               95.000   85.00                     7.000   
3              90.768               88.768   96.92                     0.768   
4              97.872               96.248   72.48                     8.248   
..                ...                  ...     ...                       ...   
625            92.960               90.960   89.80                     3.440   
626            98.064               96.376   73.76                     8.376   
627            86.880               84.256  101.88                     0.000   
628            95.728               94.592   84.32                     6.728   
629            93.392               91.392   91.96                     4.088   

     sl  
0     2  
1     2  
2     1  

In [6]:
# now we want to create a 80-20 train -test split
X_sayo = sayo_df.drop(columns=['sl'])
y_sayo = sayo_df['sl']
X_sayo_train, X_sayo_test, y_sayo_train, y_sayo_test = train_test_split(
    X_sayo, y_sayo, test_size=0.2, random_state=42)

In [7]:
print(X_sayo_train.shape)
print(X_sayo_test.shape)
print(y_sayo_train.shape)
print(y_sayo_test.shape)
print(type(y_sayo))

(504, 4)
(126, 4)
(504,)
(126,)
<class 'pandas.core.series.Series'>


In [8]:
swell_concat = pd.concat([swell_df_train, swell_df_test], axis = 0)
X_swell_concat = swell_concat.drop(swell_concat.columns[-1], axis=1)
y_swell_concat = swell_concat.iloc[:, -1]
X_swell_train, X_swell_test, y_swell_train, y_swell_test = train_test_split(
    X_swell_concat, y_swell_concat, test_size=0.2, random_state=42)

In [9]:
print(X_swell_train.shape)
print(X_swell_test.shape)
print(y_swell_train.shape)
print(y_swell_test.shape)

(328257, 35)
(82065, 35)
(328257,)
(82065,)


Training Models

In [10]:
# # using grid search to find the best n_estimators hyperparameter for swell
# from sklearn.model_selection import GridSearchCV
# from sklearn.ensemble import RandomForestClassifier

# # Define the parameter grid
# param_grid = {'n_estimators': [50, 100, 200, 300, 400, 500]}

# # Initialize the Random Forest classifier
# rf = RandomForestClassifier(random_state=42)

# # Perform grid search with cross-validation
# grid_search_swell = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5)
# grid_search_swell.fit(X_swell_train, y_swell_train)

# # Best parameter value for n_estimators
# best_n_estimators = grid_search_swell.best_params_['n_estimators']
# print(f'Best n_estimators for swell df: {best_n_estimators}')

In [11]:
# # using grid search to find the best n_estimators hyperparameter for sayo
# # Define the parameter grid
# param_grid = {'n_estimators': [50, 100, 200, 300, 400, 500]}

# # Initialize the Random Forest classifier
# rf = RandomForestClassifier(random_state=42)

# # Perform grid search with cross-validation
# grid_search_sayo = GridSearchCV(estimator=rf, param_grid=param_grid, cv=5)
# grid_search_sayo.fit(X_sayo_train, y_sayo_train)

# # Best parameter value for n_estimators
# best_n_estimators_sayo = grid_search_sayo.best_params_['n_estimators']
# print(f'Best n_estimators for sayo df: {best_n_estimators}')

In [12]:
# swell
from sklearn.ensemble import RandomForestClassifier

model1 = RandomForestClassifier(n_estimators=100, random_state=42)
model1.fit(X_swell_train, y_swell_train)


# sayo
model2 = RandomForestClassifier(n_estimators=100, random_state=42)
model2.fit(X_sayo_train, y_sayo_train)

Computing Accuracy

In [13]:
from sklearn.metrics import accuracy_score
y_swell_pred = model1.predict(X_swell_test)
y_sayo_pred = model2.predict(X_sayo_test)

# Calculate accuracy for each model
accuracy_swell = accuracy_score(y_swell_test, y_swell_pred)
accuracy_sayo = accuracy_score(y_sayo_test, y_sayo_pred)


print(f'Accuracy for swell df: {accuracy_swell}')
print(f'Accuracy for sayo df: {accuracy_sayo}')

Accuracy for swell df: 1.0
Accuracy for sayo df: 0.9841269841269841


Converting Trained Models to XCode Supported Format

In [14]:
model1_mlmodel = ct.converters.sklearn.convert(model1)
model1_mlmodel.save('model_1.mlmodel')

model2_mlmodel = ct.converters.sklearn.convert(model2)
model2_mlmodel.save('model_2.mlmodel')

