In [1]:
from sklearn.linear_model import LinearRegression, Ridge, LassoCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score
from sklearn.metrics import make_scorer
import pandas as pd
import numpy as np

In [2]:
df_encoded = pd.read_csv(r"model_training_data.csv")

In [3]:
reg = LinearRegression()


X = df_encoded.drop(["fee", "name"], axis=1)
y = df_encoded["fee"]


X_train,X_test,y_train,y_test=train_test_split(X, y, test_size=0.2, random_state=1937)
reg.fit(X_train,y_train)


y_pred = reg.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)

Mean Squared Error: 5560591173.939842
Mean Absolute Error: 26870.50777350266


In [4]:
X = df_encoded.drop(["fee", "name"], axis=1)
pca = PCA(n_components=10)


X_pca = pca.fit_transform(X)
y = df_encoded["fee"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1937)


alphas = [0.001, 0.01, 0.1, 1, 10, 100]


reg = Ridge()


param_grid = {'alpha': alphas}
grid_search = GridSearchCV(reg, param_grid, cv=5, scoring='neg_mean_squared_error')


grid_search.fit(X_train, y_train)


best_reg = grid_search.best_estimator_


y_pred = best_reg.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Best alpha found:", best_reg.alpha)

Mean Squared Error: 5552883959.338653
Mean Absolute Error: 25785.488237618065
Best alpha found: 100


In [5]:

X = df_encoded.drop(["fee", "name"], axis=1)
y = df_encoded["fee"]


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1937)


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


lasso = LassoCV(cv=5, max_iter=10000)
lasso.fit(X_train_scaled, y_train)


y_pred = lasso.predict(X_test_scaled)


mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Best alpha found:", lasso.alpha_)

Mean Squared Error: 5391943303.57895
Mean Absolute Error: 26789.70051269288
Best alpha found: 16461.373863941557
