In [25]:
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import FunctionTransformer
from sklearn.svm import SVC
# from sklearn.pipeline import Pipeline
from imblearn.pipeline import Pipeline
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.model_selection import cross_val_score
from imblearn.over_sampling import SMOTE
from collections import Counter

In [52]:
df = pd.read_csv("sleep_3000.csv")

In [59]:
def pre_process(X):
    print(X)
    X = X.drop("Person ID",axis = 1)
    X.replace('_RARE_', np.nan, inplace=True)
    
    X[['Systolic','Diastolic']] = X['Blood Pressure'].str.split('/',expand=True).apply(pd.to_numeric, errors='coerce')
    X = pd.get_dummies(X,columns=["Occupation"],prefix="Occupation")
    X.drop('Blood Pressure',axis=1,inplace=True)
    X['Systolic'].fillna(X['Systolic'].mean(), inplace=True)
    X['Diastolic'].fillna(X['Diastolic'].median(), inplace=True)
    
    numerical_features = ["Age","Sleep Duration","Quality of Sleep","Physical Activity Level","Stress Level","Heart Rate","Daily Steps","Diastolic","Systolic"]
    scaler = StandardScaler()
    X[numerical_features] = scaler.fit_transform(X[numerical_features])
    
    categorical_columns = ["Gender","BMI Category"]
    le = LabelEncoder()
    for column in categorical_columns:
      X[column] = le.fit_transform(X[column])

    return X

In [64]:
pre_process_transformer = FunctionTransformer(pre_process,validate=False)
pipeline = Pipeline([('preprocess',pre_process_transformer),('smote', SMOTE()),('classifier',SVC(kernel="rbf",C=5,gamma=1))])
X = df.drop("Sleep Disorder",axis = 1)
y = df["Sleep Disorder"]
le = LabelEncoder()
y = le.fit_transform(y)
X_train,X_test,y_train,y_test = train_test_split(X,y,random_state=42,train_size=0.80)
# X_test = pre_process(X_test)
# print(X_train)
pipeline.fit(X_train,y_train)
y_pred = pipeline.predict(X_test)
accuracy_best = accuracy_score(y_test, y_pred)
cv_scores = cross_val_score(pipeline,X_train,y_train, cv=5)
mean_cv_accuracy = cv_scores.mean()

# print(f"Optimized Random Forest AUC-ROC Score: {auc_roc_best_rfc:.4f}")
print(f"Optimized SVM Accuracy: {accuracy_best:.4f}")
print(
    f"5-Fold Cross-Validation Accuracy of Optimized Random Forest: {mean_cv_accuracy:.4f}")

      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
642         124  Female   39  ...         120/80          75         3600
700         357    Male   32  ...         125/80          72         6000
226         368  Female   49  ...         140/95          65        10000
1697        337  Female   54  ...         130/85          72         6000
1010         56  Female   39  ...         120/80          70         8000
...         ...     ...  ...  ...            ...         ...          ...
1638        351  Female   39  ...         _RARE_          78         6000
1095        333  Female   53  ...         120/80          72         6000
1130        103  Female   51  ...         130/85          65         8000
1294        165  Female   43  ...         _RARE_          65         7000
860         233  Female   28  ...         130/85          65         3500

[2400 rows x 12 columns]


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Systolic'].fillna(X['Systolic'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Diastolic'].fillna(X['Diastolic'].median(), inplace=True)


      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
1801        258  Female   39  ...         125/80          65         6000
1190         66    Male   40  ...         125/80          75         6000
1817        244  Female   47  ...         140/95          65        10000
251         306    Male   37  ...         135/90          65         3500
2505        167  Female   52  ...         _RARE_          68         7000
...         ...     ...  ...  ...            ...         ...          ...
104         325    Male   57  ...         130/85          78         7000
2087         56  Female   36  ...         120/80          70         8000
599         164    Male   37  ...         130/85          68         8000
1756         71    Male   37  ...         115/75          68         5000
1323        363  Female   44  ...         140/95          65         6000

[600 rows x 12 columns]
      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
85          2

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Systolic'].fillna(X['Systolic'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Diastolic'].fillna(X['Diastolic'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on whi

      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
642         124  Female   39  ...         120/80          75         3600
700         357    Male   32  ...         125/80          72         6000
226         368  Female   49  ...         140/95          65        10000
1697        337  Female   54  ...         130/85          72         6000
1010         56  Female   39  ...         120/80          70         8000
...         ...     ...  ...  ...            ...         ...          ...
2079        326  Female   43  ...         140/95          65         3500
1058        236    Male   35  ...         130/85          65         6000
281         259    Male   45  ...         130/85          70         8000
2762        255  Female   28  ...         125/80          72         6000
575         282    Male   49  ...         130/85          72         6000

[480 rows x 12 columns]
      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
642         1

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Systolic'].fillna(X['Systolic'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Diastolic'].fillna(X['Diastolic'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on whi

      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
85          234    Male   32  ...         125/80          65         5000
2063        341  Female   54  ...         130/85          68         7000
668         356  Female   42  ...         125/80          68         7000
1014        103    Male   57  ...         _RARE_          70         8000
128         202  Female   43  ...         130/85          75         5000
...         ...     ...  ...  ...            ...         ...          ...
1463        344  Female   39  ...         _RARE_          68         8000
2813        365  Female   50  ...         _RARE_          75         3600
2843        248  Female   55  ...         130/85          78         5000
2448        154  Female   56  ...         130/85          68         7000
413         147    Male   56  ...         _RARE_          65         6000

[480 rows x 12 columns]
      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
642         1

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Systolic'].fillna(X['Systolic'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Diastolic'].fillna(X['Diastolic'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on whi

      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
271         213  Female   53  ...         130/85          70         7000
696          85    Male   34  ...         125/80          72         5000
2973        355  Female   28  ...         130/85          65         8000
244         172    Male   50  ...         115/75          68         8000
476          77  Female   30  ...         125/80          72         8000
...         ...     ...  ...  ...            ...         ...          ...
477         230  Female   32  ...         120/80          72         8000
1248        286  Female   49  ...         130/85          72         6000
2884          8  Female   44  ...         130/85          75         3600
1772        102  Female   28  ...         130/85          65         6000
2052        285    Male   34  ...         125/80          68         6000

[480 rows x 12 columns]
      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
642         1

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Systolic'].fillna(X['Systolic'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Diastolic'].fillna(X['Diastolic'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on whi

      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
2591        128  Female   35  ...         115/75          68         8000
126         284    Male   37  ...         140/95          75        10000
2091        144    Male   28  ...         130/85          78         5000
2134        367  Female   30  ...         125/80          65         3600
887         234    Male   57  ...         130/85          75         6000
...         ...     ...  ...  ...            ...         ...          ...
2491        283    Male   35  ...         120/80          75         3500
2518        222  Female   44  ...         130/85          65         6000
399         275    Male   59  ...         140/95          75        10000
19           21  Female   50  ...         130/85          65         6000
920         359  Female   53  ...         125/80          65         5000

[480 rows x 12 columns]
      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
642         1

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Systolic'].fillna(X['Systolic'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Diastolic'].fillna(X['Diastolic'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on whi

      Person ID  Gender  Age  ... Blood Pressure  Heart Rate  Daily Steps
1365         20    Male   48  ...         135/90          68         5000
1903         70  Female   53  ...         130/85          68         7000
1296        331  Female   53  ...         140/95          75         3700
1821        217    Male   35  ...         130/85          65         5000
872          11    Male   41  ...         130/85          77         5000
...         ...     ...  ...  ...            ...         ...          ...
1638        351  Female   39  ...         _RARE_          78         6000
1095        333  Female   53  ...         120/80          72         6000
1130        103  Female   51  ...         130/85          65         8000
1294        165  Female   43  ...         _RARE_          65         7000
860         233  Female   28  ...         130/85          65         3500

[480 rows x 12 columns]
Optimized SVM Accuracy: 0.7133
5-Fold Cross-Validation Accuracy of Optimized Random For

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Systolic'].fillna(X['Systolic'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X['Diastolic'].fillna(X['Diastolic'].median(), inplace=True)
