In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
dataF = pd.read_csv('model_training_data.csv')

In [5]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error , mean_absolute_error
from sklearn.model_selection import train_test_split



model = LinearRegression()

X = dataF.drop(["fee", "name"], axis=1)
y = dataF["fee"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1937)

model.fit(X_train, y_train)

y_pred = model.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)


print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)


Mean Squared Error: 6843311283.921362
Mean Absolute Error: 42880.84693885203


In [7]:
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.decomposition import PCA


X = dataF.drop(["fee", "name"], axis=1)
y = dataF["fee"]

pca = PCA(n_components=10)
X_pca = pca.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1937)

alphas = [0.001, 0.01, 0.1, 1, 10, 100]

model = Ridge()

param_grid = {'alpha': alphas}

grid_search = GridSearchCV(model, param_grid, cv=5, scoring='neg_mean_squared_error')

grid_search.fit(X_train, y_train)

best_model = grid_search.best_estimator_

y_pred = best_model.predict(X_test)


mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Best alpha found:", best_model.alpha)

Mean Squared Error: 5662418986.662347
Mean Absolute Error: 27494.516767562556
Best alpha found: 100


In [8]:
from sklearn.linear_model import LassoCV
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


X = dataF.drop(["fee", "name"], axis=1)
y = dataF["fee"]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1937)

scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lasso = LassoCV(cv=5, max_iter=10000)
lasso.fit(X_train_scaled, y_train)

y_pred = lasso.predict(X_test_scaled)
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("Best alpha found:", lasso.alpha_)

Mean Squared Error: 5391943303.57895
Mean Absolute Error: 26789.70051269288
Best alpha found: 16461.373863941557
