In [1]:
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn import set_config; set_config(display='diagram')
from sklearn.linear_model import LinearRegression

In [2]:
df=pd.read_csv('../raw_data/cleaned_data_040321.csv',encoding='latin-1')

In [5]:
X=df[[
    'BFF_15_1','BFF_15_2','BFF_15_3','BFF_15_4','BFF_15_5','BFF_15_6','BFF_15_7','BFF_15_8','BFF_15_9','BFF_15_10','BFF_15_11',
    'BFF_15_12','BFF_15_13','BFF_15_14','BFF_15_15','Dem_age','Dem_gender','Dem_edu','Dem_edu_mom','Dem_employment','Dem_Expat',
    'Dem_maritalstatus','Dem_riskgroup','Dem_isolation'
]]
y=df['SLON3_avg']

In [8]:
def edu_func(X):
    X['Dem_edu']=X['Dem_edu'].replace({'Uninformative response':0,'None':1,'Up to 6 years of school':2, 'Up to 9 years of school':3, 'Up to 12 years of school':4, 'Some College, short continuing education or equivalent':5, 'College degree, bachelor, master': 6, 'PhD/Doctorate':7 })
    return  X[['Dem_edu']]


def edu_mom_func(X):    
    X['Dem_edu_mom'] = X['Dem_edu_mom'].replace({'Uninformative response':0,'None':1,'Up to 6 years of school':2, 'Up to 9 years of school':3, 'Up to 12 years of school':4, 'Some College or equivalent':5, 'College degree': 6, 'PhD/Doctorate':7 })
    return  X[['Dem_edu_mom']]


def edu_risk_group(X): 
    X['Dem_riskgroup'] = X['Dem_riskgroup'].replace({'No':1,'Not sure':2, 'Yes':3})    
    return  X[['Dem_riskgroup']]

def dem_expat_func(X):
    X['Dem_Expat'] = X['Dem_Expat'].replace({'no':0,'yes':1})
    return X[['Dem_Expat']]

preprocessor = ColumnTransformer(
    [    
        ('Dem_edu_col', FunctionTransformer(edu_func), ['Dem_edu']),
        ('Dem_edu_mom', FunctionTransformer(edu_mom_func), ['Dem_edu_mom']),
        ('Dem_edu_risk_group', FunctionTransformer(edu_risk_group), ['Dem_riskgroup']),
        ('dem_expat_func', FunctionTransformer(dem_expat_func), ['Dem_Expat']),
        ('ohe_transformer', OneHotEncoder(), ['Dem_gender', 'Dem_maritalstatus','Dem_employment','Dem_isolation'])
    ],remainder='passthrough'
)

base_pipe = Pipeline([
    ('imputer', preprocessor),
    ('scaler', MinMaxScaler())
])



loneliness_pipe = Pipeline([
    ('preprocessing', base_pipe),
    ('linear_regression',  LinearRegression())
])

In [9]:
loneliness_pipe.fit(X,y)

In [10]:
import joblib 
joblib.dump(loneliness_pipe, 'model_linear_loneliness.joblib')

['model_linear_loneliness.joblib']