In [2]:
# Import Statements

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor,RandomForestClassifier, HistGradientBoostingClassifier,GradientBoostingClassifier,VotingClassifier
from sklearn.metrics import mean_absolute_error, classification_report, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from xgboost import XGBClassifier
from sklearn.linear_model import LogisticRegression

In [3]:


# Load the processed data
df = pd.read_csv('processed_data.csv')

# Some more Processing
# After Model Evaluation
# We can merge the data and draw insights to get a balanced prediction
# Done at the end of code to let the data visualisation be more useful
# Group into 2 classes (short vs long attention)
df['attention_span'] = df['attention_span'].apply(lambda x: 1 if x > 1 else 0)
df.head()

# 1. Select features and standardize
features = ['notification_response', 'app_group_Utility', 'uses_productivity_apps', 'screen_activity_numeric']
X = df[features]
y = df['attention_span']  # Replace with your actual target column name

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# 2. Apply PCA
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

# 3. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)



In [4]:
# Random Forest Regressor

reg = RandomForestRegressor(random_state=42)
reg.fit(X_train, y_train)

# Predict and round predictions
y_pred = reg.predict(X_test)
y_pred_rounded = y_pred.round().clip(0, 3)  # Ensure predictions stay within 0–3

# Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred_rounded))
print(classification_report(y_test, y_pred_rounded))


Accuracy: 0.55
              precision    recall  f1-score   support

           0       0.50      0.39      0.44        18
           1       0.58      0.68      0.62        22

    accuracy                           0.55        40
   macro avg       0.54      0.54      0.53        40
weighted avg       0.54      0.55      0.54        40



In [5]:
# Random Forest Classifier

clf = RandomForestClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.55
              precision    recall  f1-score   support

           0       0.50      0.39      0.44        18
           1       0.58      0.68      0.62        22

    accuracy                           0.55        40
   macro avg       0.54      0.54      0.53        40
weighted avg       0.54      0.55      0.54        40



In [6]:
# Logistic Regression

clf = LogisticRegression(class_weight='balanced' ,solver='lbfgs', max_iter=1000)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.5
              precision    recall  f1-score   support

           0       0.46      0.67      0.55        18
           1       0.57      0.36      0.44        22

    accuracy                           0.50        40
   macro avg       0.52      0.52      0.49        40
weighted avg       0.52      0.50      0.49        40



In [7]:
# Hist Gradient Boosting Classifier

clf = HistGradientBoostingClassifier(random_state=42 , class_weight='balanced')
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.55
              precision    recall  f1-score   support

           0       0.50      0.39      0.44        18
           1       0.58      0.68      0.62        22

    accuracy                           0.55        40
   macro avg       0.54      0.54      0.53        40
weighted avg       0.54      0.55      0.54        40



In [8]:
# XGBoost Classifier

xgb = XGBClassifier(eval_metric='mlogloss', random_state=42)
xgb.fit(X_train, y_train)
y_pred = xgb.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.525
              precision    recall  f1-score   support

           0       0.46      0.33      0.39        18
           1       0.56      0.68      0.61        22

    accuracy                           0.53        40
   macro avg       0.51      0.51      0.50        40
weighted avg       0.51      0.53      0.51        40



In [9]:
# Soft Voting 

voting = VotingClassifier(estimators=[
    ('rf', RandomForestClassifier(random_state=42)),
    ('hgb', HistGradientBoostingClassifier(random_state=42)),
    ('xgb', XGBClassifier(eval_metric='mlogloss', random_state=42))
], voting='soft')

voting.fit(X_train, y_train)
y_pred = voting.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.55
              precision    recall  f1-score   support

           0       0.50      0.39      0.44        18
           1       0.58      0.68      0.62        22

    accuracy                           0.55        40
   macro avg       0.54      0.54      0.53        40
weighted avg       0.54      0.55      0.54        40



In [10]:
# Gradient Boosting Classifier

clf = GradientBoostingClassifier(random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.5
              precision    recall  f1-score   support

           0       0.42      0.28      0.33        18
           1       0.54      0.68      0.60        22

    accuracy                           0.50        40
   macro avg       0.48      0.48      0.47        40
weighted avg       0.48      0.50      0.48        40

