In [3]:
%run imports.ipynb
%run maeFunction.ipynb

In [4]:
def runModelsCV(df, drop_cols, target, splits=5):
    X, y = df.drop(drop_cols, axis=1), df[target]

    X, X_test, y, y_test = train_test_split(X, y, test_size=.2)

    X, y = np.array(X), np.array(y)
    
    kf = KFold(n_splits=splits, shuffle=True)

    lm_r2s, lm_reg_r2s, lm_poly_r2s, lasso = [], [], [], [] # collect the training results
    cv_lm_r2s, cv_lm_reg_r2s, cv_lm_poly, cv_lasso = [], [], [], [] # collect the validation results
    
    for train_ind, val_ind in kf.split(X,y):
    
        X_train, y_train = X[train_ind], y[train_ind]
        X_val, y_val = X[val_ind], y[val_ind] 

        #feature scaling linear and ridge
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)

        #linear
        lm = LinearRegression()

        lm.fit(X_train_scaled, y_train)
        lm_r2s.append(lm.score(X_train_scaled, y_train))
        cv_lm_r2s.append(lm.score(X_val_scaled, y_val))

        #ridge
        lm_reg = Ridge(alpha=170)

        lm_reg.fit(X_train_scaled, y_train)
        lm_reg_r2s.append(lm_reg.score(X_train_scaled, y_train))
        cv_lm_reg_r2s.append(lm_reg.score(X_val_scaled, y_val))

        #poly 
        poly = PolynomialFeatures(degree=2) 

        X_train_poly = poly.fit_transform(X_train)
        X_val_poly = poly.transform(X_val)

        lm_poly = LinearRegression()
        lm_poly.fit(X_train_poly, y_train)
        lm_poly_r2s.append(lm_poly.score(X_train_poly, y_train))
        cv_lm_poly.append(lm_poly.score(X_val_poly, y_val))

        #LASSO
        lasso_model = Lasso(alpha = 1.37, max_iter=5000, tol=40)

        lasso_model.fit(X_train_scaled, y_train)
        lasso.append(lasso_model.score(X_train_scaled, y_train))
        cv_lasso.append(lasso_model.score(X_val_scaled, y_val))

    print('Training sets: ')
    print(f'Simple mean cv r^2: {np.mean(lm_r2s):.3f} +- {np.std(lm_r2s):.3f}')
    print(f'Ridge mean cv r^2: {np.mean(lm_reg_r2s):.3f} +- {np.std(lm_reg_r2s):.3f}')
    print(f'Poly mean cv r^2: {np.mean(lm_poly_r2s):.3f} +- {np.std(lm_poly_r2s):.3f}')
    print(f'LASSO mean cv r^2: {np.mean(lasso):.3f} +- {np.std(lasso):.3f}\n')

    print('Validation sets: ')
    print(f'Simple mean cv r^2: {np.mean(cv_lm_r2s):.3f} +- {np.std(cv_lm_r2s):.3f}')
    print(f'Ridge mean cv r^2: {np.mean(cv_lm_reg_r2s):.3f} +- {np.std(cv_lm_reg_r2s):.3f}')
    print(f'Poly mean cv r^2: {np.mean(cv_lm_poly):.3f} +- {np.std(cv_lm_poly):.3f}')
    print(f'LASSO mean cv r^2: {np.mean(cv_lasso):.3f} +- {np.std(cv_lasso):.3f}\n')

    print('Mean Abolute Errors')
    print('Linear:', mae(y_val, lm.predict(X_val_scaled)))
    print('Ridge:', mae(y_val, lm_reg.predict(X_val_scaled)))
    print('Poly:', mae(y_val, lm_poly.predict(X_val_poly)))
    print('LASSO:', mae(y_val, lasso_model.predict(X_val_scaled)))
    print(f'Target mean {y.mean()}\n')
    
    
    X, y = df.drop(drop_cols, axis=1), df[target]
    print(f'Linear:{list(zip(X.columns, lm.coef_))}\n')
    print(f'Poly: {list(zip(X.columns, lm_poly.coef_))}\n')
    print(f'Ridge: {list(zip(X.columns, lm_reg.coef_))}\n')
    print(f'Lasso: {list(zip(X.columns, lasso_model.coef_))}')
    