In [3]:
%run imports.ipynb
%run maeFunction.ipynb

In [4]:
def runBasicModels(df, drop_cols, target, iters):
    '''Takes in a dataframe, columns to drop, the column with the target and the number of r_2s
    to produce for each model. This function runs Linear, Ridge and Polynomial Regression.
    
    Input: DataFrame, columns to drop, target variable, number of r_2s to make for each model
    
    Output: mean scores for training and validation data for each model as well as the mean absolute
    error.'''

    linear_r2 = []
    ridge_r2 = []
    poly_r2 = []
    lasso_r2 = []

    linear_val_r2 = []
    ridge_val_r2 = []
    poly_val_r2 = []
    lasso_val_r2 = []

    for iteration in range(iters):
        #getting the numerical data from the DataFrame
        X, y = df.drop(drop_cols, axis=1), df[target]

        # splitting the data into train, val and test
        X, X_test, y, y_test = train_test_split(X, y, test_size=.2)
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=.25)

        # transformations
        scaler = StandardScaler()
        poly = PolynomialFeatures(degree=2) 

        # initial models
        lm = LinearRegression()
        lm_reg = Ridge(alpha=170)
        lm_poly = LinearRegression()

        # applying the scale changes
        X_train_scaled = scaler.fit_transform(X_train)
        X_val_scaled = scaler.transform(X_val)
        X_test_scaled = scaler.transform(X_test)

        # applying the polynomial transformations
        X_train_poly = poly.fit_transform(X_train)
        X_val_poly = poly.transform(X_val)
        X_test_poly = poly.transform(X_test)


        # linear on the scaled data
        lm.fit(X_train_scaled, y_train)
        # appending training data score to list
        linear_r2.append(lm.score(X_train_scaled, y_train))
        # appending val data score to list
        linear_val_r2.append(lm.score(X_val_scaled, y_val))


        # Ridge on the scaled data
        lm_reg.fit(X_train_scaled, y_train)
        # appending training data score to list
        ridge_r2.append(lm_reg.score(X_train_scaled, y_train))
        # appending val data score to list
        ridge_val_r2.append(lm_reg.score(X_val_scaled, y_val))


        # Poly on the scaled data
        lm_poly.fit(X_train_poly, y_train)
        # appending training data score to list
        poly_r2.append(lm_poly.score(X_train_poly, y_train))
        # appending val data score to list
        poly_val_r2.append(lm_poly.score(X_val_poly, y_val))
        
        #LASSO
        lasso_model = Lasso(alpha = 1)

        lasso_model.fit(X_train_scaled, y_train)
        lasso_r2.append(lasso_model.score(X_train_scaled, y_train))
        lasso_val_r2.append(lasso_model.score(X_val_scaled, y_val))
       
    
    print('Training Data:')    
    print(f"Linear:  {np.mean(linear_r2)} +- {np.std(linear_r2)}")
    print(f"Ridge: {np.mean(ridge_r2)} +- {np.std(ridge_r2)}")
    print(f"Poly: {np.mean(poly_r2)} +- {np.std(poly_r2)}")
    print(f"Lasso: {np.mean(lasso_r2)} +- {np.std(lasso_r2)}\n")

    print('Validation Data:')
    print(f"Linear: {np.mean(linear_val_r2)} +- {np.std(linear_val_r2)}")
    print(f"Ridge: {np.mean(ridge_val_r2)} +- {np.std(ridge_val_r2)}")
    print(f"Poly: {np.mean(poly_val_r2)} +- {np.std(poly_val_r2)}") 
    print(f"Lasso: {np.mean(lasso_val_r2)} +- {np.std(lasso_val_r2)}\n")
    
    print('Mean Abolute Errors')
    print('Linear:', mae(y_val, lm.predict(X_val_scaled)))
    print('Ridge:', mae(y_val, lm_reg.predict(X_val_scaled)))
    print('Poly:', mae(y_val, lm_poly.predict(X_val_poly)))
    print('Lasso:', mae(y_val, lasso_model.predict(X_val_scaled)))
    print(f'Target mean: {y_val.mean()}')
    
    X, y = df.drop(drop_cols, axis=1), df[target]
    print(f'Linear:{list(zip(X.columns, lm.coef_))}\n')
    print(f'Poly: {list(zip(X.columns, lm_poly.coef_))}\n')
    print(f'Ridge: {list(zip(X.columns, lm_reg.coef_))}\n')
    print(f'Lasso: {list(zip(X.columns, lasso_model.coef_))}\n')
    
    
