In [None]:
# Load the Drive helper and mount
from google.colab import drive
import os

# Mount Google Drive
drive.mount('/content/drive') # mounts the Google Drive at the specified location making it accessible

cd = '/content/drive/My Drive/hit140_datasets'
os.chdir(cd)

#Import Pandas
import pandas as pd

#Read the datasets into DataFrames
df1 = pd.read_csv('dataset1.csv')
df2 = pd.read_csv('dataset2.csv')
df3 = pd.read_csv('dataset3.csv')

#Merge the Datasets
df_merge = pd.merge(df1,df2, on='ID', how = 'inner') # merge on 'ID'
df_merge = pd.merge(df_merge,df3, on='ID',how = 'inner') # merge on 'ID'

#Calculate Vitality Indicator by averaging 'Relx','Engs', 'Goodme', 'Conf'
df_merge['Vitality'] = df_merge[['Relx','Engs', 'Goodme', 'Conf']].mean(axis=1)

#Calculate Daily Screen time for each Activities
df_merge['C_davg'] = (df_merge['C_we'] * 2 + df_merge['C_wk'] * 5) / 7
df_merge['G_davg'] = (df_merge['G_we'] * 2 + df_merge['G_wk'] * 5) / 7
df_merge['S_davg'] = (df_merge['S_we'] * 2 + df_merge['S_wk'] * 5) / 7
df_merge['T_davg'] = (df_merge['T_we'] * 2 + df_merge['T_wk'] * 5) / 7

#Drop orginal screentime columns
orginal_cols = ['C_we','C_wk','G_we','G_wk','S_we','S_wk','T_we','T_wk']
df_merge = df_merge.drop(orginal_cols, axis=1)

#Drop other well-being indicators
other_indicators = ['Optm', 'Usef', 'Relx', 'Intp', 'Engs', 'Dealpr', 'Thcklr',
                    'Goodme', 'Clsep', 'Conf', 'Mkmind', 'Loved', 'Intthg', 'Cheer']
non_game_activities = ['C_davg','S_davg','T_davg']
df_merge = df_merge.drop(other_indicators, axis=1)


print(df_merge.head(10))

Mounted at /content/drive
        ID  gender  minority  deprived  Vitality    C_davg    G_davg  \
0  1087192       0         0         0      3.75  0.928571  0.500000   
1  1087195       0         0         0      3.25  1.285714  0.000000   
2  1087205       0         0         0      3.00  0.642857  0.000000   
3  1087214       0         0         0      4.25  1.285714  0.142857   
4  1087222       0         0         0      3.25  2.428571  0.000000   
5  1087244       0         0         0      3.00  1.571429  0.000000   
6  1087248       0         0         0      2.50  0.857143  0.000000   
7  1087265       0         0         0      4.00  2.000000  0.000000   
8  1087266       0         0         0      3.75  1.571429  0.000000   
9  1087270       0         0         0      3.25  0.000000  0.142857   

     S_davg    T_davg  
0  0.642857  2.571429  
1  1.571429  1.857143  
2  0.500000  1.571429  
3  1.285714  3.285714  
4  1.285714  1.285714  
5  1.571429  2.285714  
6  1.000000  

In [None]:
#Single Prediction model btw gaming time and vitality (with 0 computer time)
# Import required libraries
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.metrics import mean_squared_error, r2_score
import statsmodels.api as sm
import math

# Extract the features and target variable
X = df_merge['G_davg'].values  # Gaming daily average time
y = df_merge['Vitality'].values  # Vitality

X = sm.add_constant(X)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Initialize the linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
df_pred = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df_pred.head(40))
# Calculate model performance metrics
mse = mean_squared_error(y_test, y_pred)
mae = metrics.mean_absolute_error(y_test, y_pred)
rmse = math.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print the model's coefficients and intercept
print("Coefficient (G_davg):", model.coef_)
print("Intercept:", model.intercept_)

# Print model performance metrics
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("Root Mean Square Error (NMSE):", rmse)
print("R-squared:", r2)


    Actual  Predicted
0     4.75   3.444570
1     3.25   3.069761
2     3.50   3.237779
3     3.25   3.250703
4     2.00   3.186081
5     3.50   3.069761
6     2.75   3.367023
7     1.25   3.328250
8     4.25   3.069761
9     4.50   3.573814
10    2.50   3.069761
11    3.25   3.211930
12    1.50   3.069761
13    4.50   3.082685
14    4.00   3.302401
15    4.00   3.127921
16    3.75   3.418721
17    3.25   3.392872
18    2.25   3.250703
19    3.25   3.341174
20    3.75   3.069761
21    2.50   3.069761
22    1.50   3.160232
23    3.75   3.114996
24    2.75   3.444570
25    2.75   3.392872
26    3.00   3.069761
27    3.75   3.160232
28    3.25   3.069761
29    3.50   3.069761
30    2.00   3.069761
31    2.25   3.069761
32    3.50   3.367023
33    2.75   3.069761
34    3.00   3.069761
35    1.50   3.095610
36    2.00   3.069761
37    4.25   3.211930
38    3.75   3.276552
39    3.00   3.186081
Coefficient (G_davg): [0.         0.09047106]
Intercept: 3.069760837410874
Mean Squared Error (MSE

In [None]:
#Single Prediction model btw gaming time and vitality (only with >0 computer time)
# Filter the dataset where G_davg > 0
df_filtered = df_merge[df_merge['G_davg'] > 0]

# Extract the filtered features and target variable
X_filtered = df_filtered[['G_davg']]  # Gaming daily average time
y_filtered = df_filtered['Vitality']  # Vitality score

# Split the filtered data into training and testing sets
X_train_filtered, X_test_filtered, y_train_filtered, y_test_filtered = train_test_split(X_filtered, y_filtered, test_size=0.2, random_state=0)

# Initialize the linear regression model
model_filtered = LinearRegression()

# Fit the model on the filtered training data
model_filtered.fit(X_train_filtered, y_train_filtered)

# Add a constant to the test data
X_test_filtered = sm.add_constant(X_test_filtered)

# Make predictions on the test set
y_pred_filtered = model.predict(X_test_filtered)
df_filter_pred = pd.DataFrame({'Actual': y_test_filtered, 'Predicted': y_pred_filtered})
print(df_filter_pred.head(40))
# Calculate model performance metrics
mse = mean_squared_error(y_test_filtered, y_pred_filtered)
mae = metrics.mean_absolute_error(y_test_filtered, y_pred_filtered)
rmse = math.sqrt(mse)
r2 = r2_score(y_test_filtered, y_pred_filtered)

# Print the model's coefficients and intercept
print("Coefficient (G_davg):", model_filtered.coef_)
print("Intercept:", model_filtered.intercept_)

# Print model performance metrics
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("Root Mean Square Error (NMSE):", rmse)
print("R-squared:", r2)


       Actual  Predicted
53527    4.50   3.102072
82172    4.25   3.367023
77680    2.75   3.224854
91032    3.25   3.483343
65147    3.50   3.237779
20255    2.75   3.082685
81606    3.75   3.302401
97879    3.75   3.367023
80298    3.25   3.302401
93307    4.00   3.095610
88902    2.50   3.302401
51400    1.50   3.211930
15428    1.75   3.095610
85697    4.00   3.147307
68953    3.50   3.418721
52239    3.25   3.160232
36944    3.75   3.082685
57545    4.00   3.211930
65427    3.25   3.263627
86222    4.25   3.114996
81017    3.75   3.392872
53311    4.75   3.186081
6248     3.25   3.082685
65475    3.75   3.250703
92473    3.00   3.250703
52894    3.50   3.211930
63259    2.25   3.276552
8610     2.75   3.082685
20211    3.25   3.114996
54554    3.25   3.121459
51569    2.00   3.509192
77529    4.50   3.153770
46454    2.50   3.121459
68762    2.50   3.302401
48117    2.00   3.276552
38163    3.00   3.127921
7670     3.25   3.127921
36669    3.50   3.114996
56247    3.00   3.211930




In [None]:
#Multiple Prediction model btw gaming time and vitality (with 0 computer time)

# Extract the features and target variable
X = df_merge[['G_davg','gender','minority','deprived']].values  # Gaming daily average time
y = df_merge['Vitality'].values  # Vitality score

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Initialize the linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
df_pred = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df_pred.head(40))
# Calculate model performance metrics
mse = mean_squared_error(y_test, y_pred)
mae = metrics.mean_absolute_error(y_test, y_pred)
rmse = math.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print the model's coefficients and intercept
print("Coefficient (G_davg):", model.coef_[0])
print("Coefficient (gender):", model.coef_[1])
print("Coefficient (minority):", model.coef_[2])
print("Coefficient (deprived):", model.coef_[3])
print("Intercept:", model.intercept_)

# Print model performance metrics
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("Root Mean Square Error (NMSE):", rmse)
print("R-squared:", r2)

    Actual  Predicted
0     4.75   3.480275
1     3.25   2.933675
2     3.50   3.502144
3     3.25   3.571366
4     2.00   2.930769
5     3.50   2.863462
6     2.75   3.486221
7     1.25   2.843642
8     4.25   2.863462
9     4.50   3.470366
10    2.50   2.869475
11    3.25   3.498113
12    1.50   2.863462
13    4.50   3.514036
14    4.00   3.497189
15    4.00   3.510567
16    3.75   3.488271
17    3.25   3.484239
18    2.25   3.495140
19    3.25   3.494216
20    3.75   2.863462
21    2.50   2.939688
22    1.50   2.856525
23    3.75   3.581771
24    2.75   3.480275
25    2.75   3.490253
26    3.00   2.869475
27    3.75   3.508090
28    3.25   2.933675
29    3.50   3.515027
30    2.00   2.933675
31    2.25   2.863462
32    3.50   3.492235
33    2.75   2.863462
34    3.00   2.869475
35    1.50   2.867493
36    2.00   2.869475
37    4.25   2.852561
38    3.75   3.563371
39    3.00   3.570307
Coefficient (G_davg): -0.006936699180031714
Coefficient (gender): 0.6455514867871419
Coefficient (

In [None]:
#Multiple Prediction model btw gaming time and vitality (without 0 computer time)

df_merge2 = df_merge[df_merge['G_davg'] > 0]
# Extract the features and target variable
X = df_merge2[['G_davg','gender','minority','deprived']].values  # Gaming daily average time
y = df_merge2['Vitality'].values  # Vitality score

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

# Initialize the linear regression model
model = LinearRegression()

# Fit the model on the training data
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)
df_pred = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(df_pred.head(40))
# Calculate model performance metrics
mse = mean_squared_error(y_test, y_pred)
mae = metrics.mean_absolute_error(y_test, y_pred)
rmse = math.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Print the model's coefficients and intercept
print("Coefficient (G_davg):", model.coef_[0])
print("Coefficient (gender):", model.coef_[1])
print("Coefficient (minority):", model.coef_[2])
print("Coefficient (deprived):", model.coef_[3])
print("Intercept:", model.intercept_)

# Print model performance metrics
print("Mean Squared Error (MSE):", mse)
print("Mean Absolute Error (MAE):", mae)
print("Root Mean Square Error (NMSE):", rmse)
print("R-squared:", r2)

    Actual  Predicted
0     4.50   3.543980
1     4.25   3.491952
2     2.75   3.519870
3     3.25   3.469111
4     3.50   3.530562
5     2.75   2.894774
6     3.75   3.504642
7     3.75   3.491952
8     3.25   3.504642
9     4.00   3.610469
10    2.50   3.517873
11    1.50   2.882625
12    1.75   2.892236
13    4.00   3.535097
14    3.50   3.495031
15    3.25   3.532559
16    3.75   2.894774
17    4.00   3.587627
18    3.25   3.564245
19    4.25   3.554673
20    3.75   3.500107
21    4.75   3.527483
22    3.25   2.908004
23    3.75   3.580013
24    3.00   3.514794
25    3.50   3.574396
26    2.25   3.509718
27    2.75   2.894774
28    3.25   2.888429
29    3.25   3.540173
30    2.00   2.824253
31    4.50   3.547059
32    2.50   2.887160
33    2.50   3.504642
34    2.00   2.869935
35    3.00   2.899122
36    3.25   2.951110
37    3.50   2.888429
38    3.00   3.535638
39    1.00   3.538865
Coefficient (G_davg): -0.01776554616079689
Coefficient (gender): 0.6530133066340874
Coefficient (m

In [None]:
#Import Pandas
import pandas as pd

#Read the datasets into DataFrames
df1 = pd.read_csv('dataset1.csv')
df2 = pd.read_csv('dataset2.csv')
df3 = pd.read_csv('dataset3.csv')

#Merge the Datasets
df_merge = pd.merge(df1,df2, on='ID', how = 'inner') # merge on 'ID'
df_merge = pd.merge(df_merge,df3, on='ID',how = 'inner') # merge on 'ID'

#Calculate Vitality Indicator by averaging 'Relx','Engs', 'Goodme', 'Conf'
#df_merge['Vitality'] = df_merge[['Relx','Engs', 'Goodme', 'Conf']].mean(axis=1)

#Calculate Daily Screen time for each Activities
df_merge['C_davg'] = (df_merge['C_we'] * 2 + df_merge['C_wk'] * 5) / 7
df_merge['G_davg'] = (df_merge['G_we'] * 2 + df_merge['G_wk'] * 5) / 7
df_merge['S_davg'] = (df_merge['S_we'] * 2 + df_merge['S_wk'] * 5) / 7
df_merge['T_davg'] = (df_merge['T_we'] * 2 + df_merge['T_wk'] * 5) / 7

#Drop orginal screentime columns
orginal_cols = ['C_we','C_wk','G_we','G_wk','S_we','S_wk','T_we','T_wk']
df_merge = df_merge.drop(orginal_cols, axis=1)

#Calculate composite well-being indicator
original_indicators = ['Optm', 'Usef', 'Relx', 'Intp', 'Engs', 'Dealpr', 'Thcklr',
                    'Goodme', 'Clsep', 'Conf', 'Mkmind', 'Loved', 'Intthg', 'Cheer']
df_merge['composite_wellbeing'] = df_merge[original_indicators].mean(axis=1)
#Drop original well-being
df_merge = df_merge.drop(original_indicators, axis=1)

df_merge = df_merge[['gender', 'minority', 'deprived','G_davg','C_davg','T_davg','S_davg','composite_wellbeing']]

# Extract the features and target variable
X = df_merge[['C_davg','G_davg','S_davg','T_davg']].values
y = df_merge[['composite_wellbeing']].values
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.040
Model:                            OLS   Adj. R-squared:                  0.040
Method:                 Least Squares   F-statistic:                     1020.
Date:                Thu, 17 Oct 2024   Prob (F-statistic):               0.00
Time:                        04:02:49   Log-Likelihood:                -99691.
No. Observations:               98278   AIC:                         1.994e+05
Df Residuals:                   98273   BIC:                         1.994e+05
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.5778      0.005    719.096      0.0

In [None]:
#Import Pandas
import pandas as pd
import numpy as np
#Import the mean_absolute_error function
from sklearn.metrics import mean_absolute_error, mean_squared_error

#Read the datasets into DataFrames
df1 = pd.read_csv('dataset1.csv')
df2 = pd.read_csv('dataset2.csv')
df3 = pd.read_csv('dataset3.csv')

#Merge the Datasets
df_merge = pd.merge(df1,df2, on='ID', how = 'inner') # merge on 'ID'
df_merge = pd.merge(df_merge,df3, on='ID',how = 'inner') # merge on 'ID'

#Calculate Vitality Indicator by averaging 'Relx','Engs', 'Goodme', 'Conf'
#df_merge['Vitality'] = df_merge[['Relx','Engs', 'Goodme', 'Conf']].mean(axis=1)

#Calculate Daily Screen time for each Activities
df_merge['C_davg'] = (df_merge['C_we'] * 2 + df_merge['C_wk'] * 5) / 7
df_merge['G_davg'] = (df_merge['G_we'] * 2 + df_merge['G_wk'] * 5) / 7
df_merge['S_davg'] = (df_merge['S_we'] * 2 + df_merge['S_wk'] * 5) / 7
df_merge['T_davg'] = (df_merge['T_we'] * 2 + df_merge['T_wk'] * 5) / 7

#Drop orginal screentime columns
orginal_cols = ['C_we','C_wk','G_we','G_wk','S_we','S_wk','T_we','T_wk']
df_merge = df_merge.drop(orginal_cols, axis=1)

#Calculate composite well-being indicator
original_indicators = ['Optm', 'Usef', 'Relx', 'Intp', 'Engs', 'Dealpr', 'Thcklr',
                    'Goodme', 'Clsep', 'Conf', 'Mkmind', 'Loved', 'Intthg', 'Cheer']
df_merge['composite_wellbeing'] = df_merge[original_indicators].mean(axis=1)
#Drop original well-being
df_merge = df_merge.drop(original_indicators, axis=1)

df_merge = df_merge[['gender', 'minority', 'deprived','G_davg','C_davg','T_davg','S_davg','composite_wellbeing']]

# Extract the features and target variable
# Changed .values() to .values to correctly access the attribute
X = df_merge[['C_davg','G_davg','S_davg','T_davg','gender','minority','deprived']].values
y = df_merge[['composite_wellbeing']].values
import statsmodels.api as sm #Import statsmodels
X = sm.add_constant(X)
model = sm.OLS(y,X).fit()
# Add a constant (intercept) term to the features
X = sm.add_constant(X)

# Fit the Ordinary Least Squares (OLS) model
model = sm.OLS(y, X).fit()

# Print the model summary
print(model.summary())

# Make predictions using the fitted model
y_pred = model.predict(X)

# Calculate additional evaluation metrics
mse = mean_squared_error(y, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y, y_pred)

# Print the metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"Mean Absolute Error (MAE): {mae}")

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.091
Model:                            OLS   Adj. R-squared:                  0.091
Method:                 Least Squares   F-statistic:                     1403.
Date:                Thu, 17 Oct 2024   Prob (F-statistic):               0.00
Time:                        05:03:42   Log-Likelihood:                -97010.
No. Observations:               98278   AIC:                         1.940e+05
Df Residuals:                   98270   BIC:                         1.941e+05
Df Model:                           7                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const          3.4020      0.006    606.086      0.0