In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error, f1_score, r2_score
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestRegressor

In [None]:
#Read dataset
df = pd.read_csv("")   

In [None]:
accx = df['accx']
accy = df['accy']
accz = df['accz']
eda  = df['eda_f']
temp = df['temp_f']
hr   = df['HR']

numerical_features = pd.concat(
    [accx, accy, accz, eda, temp, hr],
    axis=1,
    ignore_index=True
)

numerical_features.columns = [
    'accx', 'accy', 'accz', 'eda_f', 'temp_f', 'HR'
]

X = numerical_features
y = df['In_solving_or_studying_the_topics_in_lecture_I_invested']   # target


In [None]:
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()

X_scaled = scaler_X.fit_transform(X)
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).ravel()

X = pd.DataFrame(X_scaled, columns=X.columns)
y = pd.Series(y_scaled)

In [None]:
def compute_metrics(y_true, y_pred):
    rmse = np.sqrt(mean_squared_error(y_true, y_pred))
    f1 = f1_score(np.rint(y_true), np.rint(y_pred),average='weighted')
    #r2 = r2_score(y_true, y_pred)
    print("Y_true",y_true)
    print("Y_pred",y_pred)
    transitions_present = (np.diff(y_true)!=0).sum()
    transitions_detected = (np.diff(y_pred)!=0).sum()
    print("transitions in y_true",y_true)
    print("transitions in y_pred",y_pred)
    return {
        "RMSE": rmse,
        "F1": f1,
        "Transitions Present": transitions_present,
        "Transitions Detected": transitions_detected
    }

In [None]:
def run_experiments(model, X, y, model_name, random_state=42):
    results = []
    X = X.reset_index(drop=True)
    y = y.reset_index(drop=True)

    n_samples = len(X)
    for train_pct in range(10, 100, 10):
        test_pct = 100 - train_pct
        split_idx = int(n_samples * train_pct / 100)
        X_train = X.iloc[:split_idx]
        X_test  = X.iloc[split_idx:]

        y_train = y.iloc[:split_idx]
        y_test  = y.iloc[split_idx:]


        model.fit(X_train, y_train)

        y_train_pred = model.predict(X_train)
        y_test_pred = model.predict(X_test)

        train_metrics = compute_metrics(y_train, y_train_pred)
        test_metrics = compute_metrics(y_test, y_test_pred)
        train_r2 = model.score(X_train, y_train)   
        test_r2  = model.score(X_test, y_test)  

        results.append({
            "Model": model_name,
            "Train %": train_pct,
            "Test %": test_pct,

            "Train RMSE": train_metrics["RMSE"],
            "Train F1": train_metrics["F1"],
            "Train R2": train_r2,

            "Test RMSE": test_metrics["RMSE"],
            "Test F1": test_metrics["F1"],
            "Test R2": test_r2,

            "Transitions Present (Test)": test_metrics["Transitions Present"],
            "Transitions Detected (Test)": test_metrics["Transitions Detected"]
        })

    return pd.DataFrame(results)


#K-Nearest neighbors

In [None]:
knn= KNeighborsRegressor(n_neighbors=3)
knn_results = run_experiments(
    model=knn,
    X=X,
    y=y,
    model_name="KNN"
)

In [None]:
knn_results

#Logistic regression

In [None]:
lr = LogisticRegression(solver='liblinear')

lr_results = run_experiments(
    model=lr,
    X=X,
    y=y,
    model_name="Logistic Regression"
)

In [None]:
lr_results

#Random forest

In [None]:
rf = RandomForestRegressor(n_estimators=1000)

rf_results = run_experiments(
    model=rf,
    X=X,
    y=y,
    model_name="Random Forest Regressor"
)


In [None]:
rf_results

#Parameter estimates from the ordinal logistic regression model

In [None]:
from statsmodels.miscmodels.ordinal_model import OrderedModel
model = OrderedModel(
    y,
    X,
    distr="logit"   # ordinal logistic regression
)

result = model.fit(method="bfgs", disp=False)

In [None]:
params = result.params
se = result.bse

In [None]:
features = X.columns

params = params[features]
se = se[features]

In [None]:
chi2 = (params / se) ** 2
or_values = np.exp(params)

In [None]:
parameter_est = pd.DataFrame({
    "Feature": features,
    "Estimate": params.values,
    "SE": se.values,
    "Ï‡2 (df = 1)": chi2.values,
    "OR": or_values.values
})

rename_dict = {
    "eda_f": "EDA",
    "temp_f": "TEMP",
    "HR": "HR",
    "accx": "ACC X",
    "accy": "ACC Y",
    "accz": "ACC Z"
}
parameter_est["Feature"] = parameter_est["Feature"].map(rename_dict)
print(parameter_est)