<a href="https://colab.research.google.com/github/blufury/Class-projects/blob/master/Ridge%26Lasso.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import numpy as np
import statsmodels.api as sm

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score

college = pd.read_csv('College.csv')

Multiple Leinear Regression

In [5]:
college['Grad.Rate'] = college['Grad.Rate'].apply(lambda x: min(x, 100)) #cap to 100
college['Private'] = college['Private'].map({'Yes': 1, 'No': 0}) #dummy var

x = college.drop(columns=['Unnamed: 0', 'Grad.Rate'])
y = college['Grad.Rate']

x = x.fillna(x.mean())
y = y.fillna(y.mean())

xTrain, xTest, yTrain, yTest = train_test_split(x, y, test_size=0.2, random_state=32)

scaler = StandardScaler()
xTrainScal = scaler.fit_transform(xTrain)
xTestScal = scaler.transform(xTest)

xTrainMLR = sm.add_constant(xTrain)
olsModel = sm.OLS(yTrain, xTrainMLR).fit()
print('Multiple Linear Regression Summary: ')
print(olsModel.summary())

Multiple Linear Regression Summary: 
                            OLS Regression Results                            
Dep. Variable:              Grad.Rate   R-squared:                       0.469
Model:                            OLS   Adj. R-squared:                  0.454
Method:                 Least Squares   F-statistic:                     31.36
Date:                Wed, 26 Mar 2025   Prob (F-statistic):           1.24e-71
Time:                        20:34:22   Log-Likelihood:                -2448.6
No. Observations:                 621   AIC:                             4933.
Df Residuals:                     603   BIC:                             5013.
Df Model:                          17                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const        

# Ridge Regression

In [11]:
ridge = RidgeCV(alphas=np.logspace(-4,4,50), cv=5)
ridge.fit(xTrainScal, yTrain)
yPredRidge = ridge.predict(xTestScal)

print('Ridge Regression Results: ')
print(f'Best Alpha: {ridge.alpha_:.4f}')
print(f'R Squared: {r2_score(yTest, yPredRidge):.4f}')
print(f'RMSE: {np.sqrt(mean_squared_error(yTest, yPredRidge)):.4f}')
print('Coefficients:')
for col, coef in zip(x.columns, ridge.coef_):
  print(f'{col}: {coef:.4f}')

Ridge Regression Results: 
Best Alpha: 75.4312
R Squared: 0.4001
RMSE: 12.8184
Coefficients:
Private: 1.5759
Apps: 2.0465
Accept: 1.1139
Enroll: 0.1144
Top10perc: 2.1699
Top25perc: 1.9800
F.Undergrad: -0.6098
P.Undergrad: -2.2008
Outstate: 3.3815
Room.Board: 1.8940
Books: -0.1380
Personal: -1.3544
PhD: 0.9134
Terminal: -0.5460
S.F.Ratio: 0.6486
perc.alumni: 2.9089
Expend: -1.2032


# Lasso Regression Model

In [12]:
lasso = LassoCV(alphas=np.logspace(-4,4,50), cv=5, max_iter=10000)
lasso.fit(xTrainScal, yTrain)
yPredLasso = lasso.predict(xTestScal)

print('Lasso Regression Result:')
print(f"Best Alpha: {lasso.alpha_:.4f}")
print(f"R Squared: {r2_score(yTest, yPredLasso):.4f}")
print(f"RMSE: {np.sqrt(mean_squared_error(yTest, yPredLasso)):.4f}")
print("Coefficients:")
for col, coef in zip(x.columns, lasso.coef_):
    print(f"{col}: {coef:.4f}")

Lasso Regression Result:
Best Alpha: 0.2683
R Squared: 0.4002
RMSE: 12.8172
Coefficients:
Private: 1.1153
Apps: 2.7211
Accept: 0.0000
Enroll: 0.0000
Top10perc: 1.9183
Top25perc: 1.9271
F.Undergrad: -0.0000
P.Undergrad: -2.2513
Outstate: 4.1377
Room.Board: 1.6433
Books: -0.0000
Personal: -1.2718
PhD: 0.1089
Terminal: -0.0000
S.F.Ratio: 0.4472
perc.alumni: 2.9635
Expend: -1.2055
