In [3]:
import pandas as pd

df = pd.read_csv('cleaned_data.csv')
df.head()

Unnamed: 0,participant_id,day,PSS_score,Openness,Conscientiousness,Extraversion,Agreeableness,Neuroticism,sleep_time,wake_time,sleep_duration,PSQI_score,call_duration,num_calls,num_sms,screen_on_time,skin_conductance,accelerometer,mobility_radius,mobility_distance
0,1,1,34,2.322732,4.332193,1.185878,1.570213,3.782094,22.618591,5.19066,6.572069,1,3.924527,12,32,10.703714,3.11573,0.161717,1.145179,2.196851
1,1,2,37,1.761436,3.25412,3.907281,4.072512,1.997145,22.140549,6.170717,8.030168,4,58.318004,3,41,11.012939,0.959144,0.985587,1.021133,0.737825
2,1,3,30,3.025887,1.855002,2.0459,2.317493,3.619225,22.216405,5.318825,7.10242,1,4.941043,4,48,4.877372,3.311629,1.877445,0.478179,0.911673
3,1,4,16,1.94837,4.966676,3.345225,1.607756,3.583524,23.937781,8.061075,8.123294,3,0.295373,11,38,3.462956,0.625721,0.494921,0.630549,3.911418
4,1,5,32,3.343484,2.065936,3.137843,2.118061,2.567347,23.527002,7.312145,7.785143,3,22.300571,17,17,4.861046,0.622609,1.3426,0.25409,1.605132


In [9]:
import numpy as np
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error

# Step 1: Subsample the dataset
sample_df = df.sample(frac=0.1, random_state=42)  # Use 10% of the data
X = sample_df.drop(columns=['PSS_score', 'participant_id', 'day'])
y = sample_df['PSS_score']


In [11]:
# Step 2: Preprocessing (SVR is sensitive to differences in feature scales)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)


In [13]:
# Step 3: SVR with RandomizedSearchCV (hyperparameter tuning)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
param_dist = {'C': [1, 10, 100], 'gamma': [0.1, 1, 10], 'epsilon': [0.1, 0.2, 0.5], 'kernel': ['rbf']}
svr = SVR()
random_search = RandomizedSearchCV(svr, param_dist, scoring='neg_root_mean_squared_error', cv=3, n_iter=10, random_state=42, n_jobs=-1)
random_search.fit(X_train, y_train)

In [15]:
# Step 5: Evaluation
best_svr = random_search.best_estimator_
y_pred = best_svr.predict(X_test)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
print(f"Optimized RMSE: {rmse}")

Optimized RMSE: 7.70946129176196
