In [1]:
import pickle
import sklearn
import numpy as np
import pandas as pd
from json import loads



In [2]:
base_path = '/kaggle/input/self-reported-qol/20230625-processed-'

df_physical      = pd.read_csv(base_path + 'physical-qol.csv')
df_psychological = pd.read_csv(base_path + 'psychological-qol.csv')

df_physical.drop('id', axis=1, inplace=True)         # id
df_physical.drop('day', axis=1, inplace=True)        # day
df_psychological.drop('id', axis=1, inplace=True)    # id
df_psychological.drop('day', axis=1, inplace=True)   # day

phy_all_tmp = df_physical.copy()
phy_init_set_tmp = phy_all_tmp.query("group in ('Initial Set')").copy()
phy_ufpi_ufc_tmp = phy_all_tmp.query("group in ('UFPI', 'UFC')").copy()

psy_all_tmp = df_psychological.copy()
psy_init_set_tmp = psy_all_tmp.query("group in ('Initial Set')").copy()
psy_ufpi_ufc_tmp = psy_all_tmp.query("group in ('UFPI', 'UFC')").copy()

phy_all_tmp.drop("group", axis=1, inplace=True)
phy_init_set_tmp.drop("group", axis=1, inplace=True)
phy_ufpi_ufc_tmp.drop("group", axis=1, inplace=True)

psy_all_tmp.drop("group", axis=1, inplace=True)
psy_init_set_tmp.drop("group", axis=1, inplace=True)
psy_ufpi_ufc_tmp.drop("group", axis=1, inplace=True)

# Separating predictors from the values to be predicted.
phy_all           = phy_all_tmp.drop("phy_ref_score", axis=1)
phy_all_pred      = phy_all_tmp["phy_ref_score"].copy()
phy_init_set      = phy_init_set_tmp.drop("phy_ref_score", axis=1)
phy_init_set_pred = phy_init_set_tmp["phy_ref_score"].copy()
phy_ufpi_ufc      = phy_ufpi_ufc_tmp.drop("phy_ref_score", axis=1)
phy_ufpi_ufc_pred = phy_ufpi_ufc_tmp["phy_ref_score"].copy()

psy_all           = psy_all_tmp.drop("psy_ref_score", axis=1)
psy_all_pred      = psy_all_tmp["psy_ref_score"].copy()
psy_init_set      = psy_init_set_tmp.drop("psy_ref_score", axis=1)
psy_init_set_pred = psy_init_set_tmp["psy_ref_score"].copy()
psy_ufpi_ufc      = psy_ufpi_ufc_tmp.drop("psy_ref_score", axis=1)
psy_ufpi_ufc_pred = psy_ufpi_ufc_tmp["psy_ref_score"].copy()

In [3]:
def getDataset(domain_group):
    return {
        'phy_all': (phy_all, phy_all_pred),
        'psy_all': (psy_all, psy_all_pred),
        
        'phy_init_set': (phy_init_set, phy_init_set_pred),
        'psy_init_set': (psy_init_set, psy_init_set_pred),
        
        'phy_ufpi_ufc': (phy_ufpi_ufc, phy_ufpi_ufc_pred),
        'psy_ufpi_ufc': (psy_ufpi_ufc, psy_ufpi_ufc_pred),
    }[domain_group]

In [4]:
# Importing
phy_model = pickle.load(open("/kaggle/input/self-reported-qol/phy-qol-model.pickle", "rb"))
psy_model = pickle.load(open("/kaggle/input/self-reported-qol/psy-qol-model.pickle", "rb"))

In [5]:
df_physical.head()

Unnamed: 0,height,weight,steps,calories,lightsleep,deepsleep,remsleep,awakesleep,incomingcalls,rejectedcalls,...,biking,running(treadmill),strengthtraining,profession_fulltimeworker,profession_parttimeworker,profession_selfemployed,profession_student,maritalstatus_married,maritalstatus_single,phy_ref_score
0,1.63,73.0,7420,1804.5,16980,4560,5280,780,4,0,...,0,0,0,1,0,0,0,0,1,78
1,1.65,79.0,7630,1650.7,18663,3540,2160,3360,2,1,...,0,0,0,0,0,0,1,0,1,75
2,1.63,73.0,5401,1868.6,15720,6240,7440,600,1,0,...,0,0,0,1,0,0,0,0,1,78
3,1.8,150.0,361,2396.97,30840,1440,0,1920,0,0,...,0,0,0,0,0,0,1,0,1,60
4,1.63,73.0,4503,1758.03,16560,6120,5640,3540,0,0,...,0,0,0,1,0,0,0,0,1,78


In [6]:
X_phy, y_phy = getDataset("phy_all")
y_predicted_phy = pd.DataFrame(phy_model.predict(X_phy), columns=['predicted_phy'])
y_predicted_phy = y_predicted_phy.round(2)

X_psy, y_psy = getDataset("psy_all")
y_predicted_psy = pd.DataFrame(psy_model.predict(X_psy), columns=['predicted_psy'])
y_predicted_psy = y_predicted_psy.round(2)

result = pd.concat([X_phy, y_phy, y_predicted_phy, y_psy, y_predicted_psy], axis=1)
result.head()

Unnamed: 0,height,weight,steps,calories,lightsleep,deepsleep,remsleep,awakesleep,incomingcalls,rejectedcalls,...,profession_fulltimeworker,profession_parttimeworker,profession_selfemployed,profession_student,maritalstatus_married,maritalstatus_single,phy_ref_score,predicted_phy,psy_ref_score,predicted_psy
0,1.63,73.0,7420,1804.5,16980,4560,5280,780,4,0,...,1,0,0,0,0,1,78,79.81,66,66.0
1,1.65,79.0,7630,1650.7,18663,3540,2160,3360,2,1,...,0,0,0,1,0,1,75,79.67,62,62.0
2,1.63,73.0,5401,1868.6,15720,6240,7440,600,1,0,...,1,0,0,0,0,1,78,79.6,66,66.0
3,1.8,150.0,361,2396.97,30840,1440,0,1920,0,0,...,0,0,0,1,0,1,60,60.68,62,62.0
4,1.63,73.0,4503,1758.03,16560,6120,5640,3540,0,0,...,1,0,0,0,0,1,78,80.46,66,66.0


In [7]:
result.to_csv('20230903-results-phy-psy-best-models.csv', index = False)