In [52]:
from sklearn import preprocessing
from sklearn.preprocessing import OneHotEncoder
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import MinMaxScaler
from sklearn import set_config; set_config(display='diagram')
from sklearn.linear_model import LinearRegression
import pickle

## Create Data Frame

In [53]:
df=pd.read_csv('../raw_data/cleaned_data_040321.csv',encoding='latin-1')

## Defining X and y

In [54]:
X=df[[
    'BFF_15_1','BFF_15_2','BFF_15_3','BFF_15_4','BFF_15_5','BFF_15_6','BFF_15_7','BFF_15_8','BFF_15_9','BFF_15_10','BFF_15_11',
    'BFF_15_12','BFF_15_13','BFF_15_14','BFF_15_15','Dem_age','Dem_gender','Dem_edu','Dem_edu_mom','Dem_employment','Dem_Expat',
    'Dem_maritalstatus','Dem_riskgroup','Dem_isolation'
]]
y=df['PSS10_avg']

## Create functions for transformations

In [55]:
def edu_func(df):
    df['Dem_edu']=df['Dem_edu'].replace(
        {
            'Uninformative response':0,
            'None':1,
            'Up to 6 years of school':2, 
            'Up to 9 years of school':3, 
            'Up to 12 years of school':4, 
            'Some College, short continuing education or equivalent':5,
            'College degree, bachelor, master': 6,
            'PhD/Doctorate':7
        })
    return  df[['Dem_edu']]


def edu_mom_func(df):    
    df['Dem_edu_mom'] = df['Dem_edu_mom'].replace({
        'Uninformative response':0,
        'None':1,
        'Up to 6 years of school':2,
        'Up to 9 years of school':3,
        'Up to 12 years of school':4,
        'Some College or equivalent':5,
        'College degree': 6,
        'PhD/Doctorate':7
    }
    )
    return  df[['Dem_edu_mom']]


def risk_group_func(df): 
    df['Dem_riskgroup'] = df['Dem_riskgroup'].replace({'No':1,'Not sure':2, 'Yes':3})    
    return  df[['Dem_riskgroup']]

def expat_func(df):
    df['Dem_Expat'] = df['Dem_Expat'].replace({'no':0,'yes':1})
    return df[['Dem_Expat']]

In [56]:
preprocessor = ColumnTransformer(
    [    
        ('edu', FunctionTransformer(edu_func), ['Dem_edu']),
        ('edu_mom', FunctionTransformer(edu_mom_func), ['Dem_edu_mom']),
        ('risk_group', FunctionTransformer(risk_group_func), ['Dem_riskgroup']),
        ('expat', FunctionTransformer(expat_func), ['Dem_Expat']),
        ('ohe_transformer', OneHotEncoder(), ['Dem_gender', 'Dem_maritalstatus','Dem_employment','Dem_isolation'])
    ],remainder='passthrough'
)

base_pipe = Pipeline([
    ('imputer', preprocessor),
    ('scaler', MinMaxScaler())
])



stress_pipe = Pipeline([
    ('preprocessing', base_pipe),
    ('linear_regression',  LinearRegression())
])

In [57]:
# stress_pipe.fit(X,y)

In [58]:
# stress_pipe.get_params()

In [59]:
pd.DataFrame(stress_pipe.fit_transform(X)).head()

ValueError: This LinearRegression estimator requires y to be passed, but the target y is None.

In [23]:
import joblib 
joblib.dump(stress_pipe, 'model_linear_stress_3.joblib')

['model_linear_stress_3.joblib']

In [24]:
import pickle
# Export pipeline as pickle file
with open("stress_pipeline.pkl", "wb") as file:
    pickle.dump(stress_pipe, file)

# Method 2

In [15]:
from sklearn.base import TransformerMixin
from sklearn.base import BaseEstimator

In [25]:
class ColumnEdu(TransformerMixin, BaseEstimator):
    def __init(self, column_1):
        self.column_1 = column_1
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        
        df['Dem_edu']=df['Dem_edu'].replace(
            {
                'Uninformative response':0,
                'None':1,
                'Up to 6 years of school':2, 
                'Up to 9 years of school':3, 
                'Up to 12 years of school':4, 
                'Some College, short continuing education or equivalent':5,
                'College degree, bachelor, master': 6,
                'PhD/Doctorate':7
            }
        )
        return  df[['Dem_edu']]

In [26]:
class ColumnEduMom(TransformerMixin, BaseEstimator):
    def __init(self, column_1):
        self.column_1 = column_1
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        df['Dem_edu_mom'] = df['Dem_edu_mom'].replace(
            {   
                'Uninformative response':0,
                'None':1,
                'Up to 6 years of school':2,
                'Up to 9 years of school':3,
                'Up to 12 years of school':4,
                'Some College or equivalent':5,
                'College degree': 6,
                'PhD/Doctorate':7
            }
        )
        return  df[['Dem_edu_mom']]

In [27]:
class ColumnRisk(TransformerMixin, BaseEstimator):
    def __init(self, column_1):
        self.column_1 = column_1
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        df['Dem_riskgroup'] = df['Dem_riskgroup'].replace({'No':1,'Not sure':2, 'Yes':3})    
        return  df[['Dem_riskgroup']]

In [28]:
class ColumnExpat(TransformerMixin, BaseEstimator):
    def __init(self, column_1):
        self.column_1 = column_1
        
    def fit(self, X, y=None):
        return self
    
    def transform(self, X, y=None):
        df['Dem_Expat'] = df['Dem_Expat'].replace({'no':0,'yes':1})
        return df[['Dem_Expat']]

In [32]:
preprocessor = ColumnTransformer(
    [
        ('edu_func', ColumnEdu(),['Dem_edu']),
        ('edu_mom_func', ColumnEduMom(),['Dem_edu_mom']),
        ('risk_func', ColumnRisk(),['Dem_riskgroup']),
        ('expat_func', ColumnExpat(),['Dem_Expat']),
        ('ohe_transformer', OneHotEncoder(), ['Dem_gender', 'Dem_maritalstatus','Dem_employment','Dem_isolation'])
    ],remainder='passthrough'
)

base_pipe_2 = Pipeline([
    ('imputer', preprocessor),
    ('scaler', MinMaxScaler())
])

stress_pipe_2 = Pipeline([
    ('preprocessing', base_pipe),
    ('linear_regression',  LinearRegression())
])

In [33]:
stress_pipe_2

In [34]:
pd.DataFrame(stress_pipe_2.fit_transform(X)).head()

ValueError: This LinearRegression estimator requires y to be passed, but the target y is None.