In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error
import pandas as pd
import matplotlib.pyplot  as plt
import seaborn as sns
import numpy as np
import scipy

In [None]:
call_hours_customers = [2, 3, 4, 5, 6, 1.5, 5, 7, 8, 10]
money_earned = [50, 70, 90, 100, 110, 40, 110, 130, 145, 180]

dict = {'call_hours_customers':call_hours_customers, 'money_earned':money_earned}
df = pd.DataFrame(dict)
df_copy = df.copy()
df_copy.head()

In [None]:
x = df_copy.drop('money_earned', axis=1)
y = df_copy['money_earned']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
print(model.predict(X_test))
print(y_test)

In [None]:
test_prediction = model.predict(X_test)
MAE = mean_absolute_error(y_test, test_prediction)
MSE = mean_squared_error(y_test, test_prediction)
RMSE = np.sqrt(MSE)
y_mean = y.mean()
print(f'MAE error in our model is {MAE}')
print(f'MSE error in our model is {MSE}')
print(f'RMSE error in our model is {RMSE}')

In [None]:
y_mean = y.mean()
test_prediction = model.predict(X_test)
MAE = mean_absolute_error(y_test, test_prediction)
MSE = mean_squared_error(y_test, test_prediction)
RMSE = np.sqrt(MSE)
print(f'MAE error percent in our model is {MAE/y_mean*100}%')
print(f'MSE error percent in our model is {MSE/y_mean*100}%')
print(f'RMSE error percent in our model is {RMSE/y_mean*100}%')

In [None]:
test_residuals = y_test - test_prediction
sns.scatterplot(x=y_test, y=test_residuals)
plt.axhline(y=0, color='r', ls='--')
plt.show()

In [None]:
# Class Exercise - Deploying Model 22/01/25

In [None]:
from joblib import dump, load
model.fit(x, y)
final_model = model
dump(final_model, 'final_model.joblib')

In [None]:
loaded_model = load('final_model.joblib')

In [None]:
loaded_model.coef_

In [None]:
x.shape

In [None]:
new_hours = [[5.5], [15]]
loaded_model.predict(new_hours)

In [None]:
# Class Exercise - Polynomial Regression

In [None]:
from sklearn.preprocessing import PolynomialFeatures

In [None]:
# Train the model on 6 different polynomial regression degrees (from 1 to 6 included)

In [None]:
train_rmse_error = []
test_rmse_error = []

for degree in range(1, 7):
    poly_converter = PolynomialFeatures(degree=degree, include_bias=False)
    poly_features = poly_converter.fit_transform(x)
    X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=42)
    model.fit(X_train, y_train)

    train_predict = model.predict(X_train)
    test_predict = model.predict(X_test)

    train_rmse = np.sqrt(mean_squared_error(y_train, train_predict))
    test_rmse = np.sqrt(mean_squared_error(y_test, test_predict))

    train_rmse_error.append(train_rmse)
    test_rmse_error.append(test_rmse)

plt.plot(range(1,5), train_rmse_error[:4], label='Train RMSE')
plt.plot(range(1,5), test_rmse_error[:4], label='Test RMSE')

plt.ylabel('RMSE')
plt.xlabel('Poly Degree')
plt.legend()
plt.show()

In [None]:
# Find what is the best fit degree for polynomial regression training
# Best fit should be at 2rd Degree 

In [None]:
# Create a final model using the degree you chose and deploy it

In [None]:
poly_converter = PolynomialFeatures(degree=2, include_bias=False)
poly_features = poly_converter.fit_transform(x)
model.fit(poly_features, y)

In [None]:
# Import your final model from the joblib file and load it back to your working area

In [None]:
final_poly_model = model
dump(final_poly_model, 'final_poly_model.joblib')
dump(poly_converter, 'final_converter.joblib')


In [None]:
# Use the import model to predict how much money will be earned with the following new hours of
# calling customers [5.5, 15] 

In [None]:
loaded_final_poly_model = load('final_poly_model.joblib')
final_converter = load('final_converter.joblib')

new_hours = [[5.5], [15]]

poly_features = final_converter.fit_transform(new_hours)
loaded_final_poly_model.predict(poly_features)

In [None]:
df = pd.read_csv('csvs/Advertising.csv')

In [None]:
df_copy = df.copy()
df_copy.head()

In [None]:
X = df_copy.drop('sales', axis=1)
y = df_copy['sales']

In [None]:
poly_converter = PolynomialFeatures(degree=3, include_bias=False)
poly_features = poly_converter.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)

In [None]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(X_train)
scaled_X_train = scaler.transform(X_train)
scaled_X_test = scaler.transform(X_test)

In [None]:
from sklearn.linear_model import Ridge

ridge_model = Ridge(alpha=10)
ridge_model.fit(scaled_X_train, y_train)
test_prediction = ridge_model.predict(scaled_X_test)

MAE = mean_absolute_error(y_test, test_prediction)
RMSE = np.sqrt(mean_squared_error(y_test, test_prediction))

print(f'MAE: {MAE}')
print(f'RMSE: {RMSE}')

In [None]:
from sklearn.linear_model import RidgeCV

ridge_cv_model = RidgeCV(alphas=(0.1, 1.0, 10.0), scoring='neg_mean_absolute_error')
ridge_cv_model.fit(scaled_X_train, y_train)
test_prediction = ridge_cv_model.predict(scaled_X_test)

MAE = mean_absolute_error(y_test, test_prediction)
RMSE = np.sqrt(mean_squared_error(y_test, test_prediction))

print(f'MAE: {MAE}')
print(f'RMSE: {RMSE}')

In [None]:
ridge_cv_model.alpha_

In [None]:
call_hours_customers = [2, 3, 4, 5, 6, 1.5, 5, 7, 8, 10]
money_earned = [50, 70, 90, 100, 110, 40, 110, 130, 145, 180]

dict = {'call_hours_customers':call_hours_customers, 'money_earned':money_earned}
df = pd.DataFrame(dict)
df_copy = df.copy()

X = df_copy.drop('money_earned', axis=1)
y = df_copy['money_earned']

In [None]:
# Convert your model data to be with polynomial regression of 4 degree

In [None]:
poly_converter = PolynomialFeatures(degree=4, include_bias=False)
poly_features = poly_converter.fit_transform(X)

In [None]:
# Use feature scaling of type Normalization to prepare your data set

In [None]:
from sklearn.preprocessing import Normalizer
X_train, X_test, y_train, y_test = train_test_split(poly_features, y, test_size=0.3, random_state=101)
normalizer = Normalizer()
normalizer.fit(X_train)
normalised_X_train = normalizer.transform(X_train)
normalised_X_test = normalizer.transform(X_test)

In [None]:
# Use basic Ridge model to eliminate overfitting and choose alpha value of 5
# ● Print the MAE and RMSE results of your model

In [None]:
from sklearn.linear_model import Ridge

ridge_model = Ridge(alpha=5)
ridge_model.fit(normalised_X_train, y_train)
test_prediction = ridge_model.predict(normalised_X_test)

MAE = mean_absolute_error(y_test, test_prediction)
RMSE = np.sqrt(mean_squared_error(y_test, test_prediction))

print(f'MAE: {MAE}')
print(f'RMSE: {RMSE}')

In [None]:
# Use RidgeCV and find what is the optimal alpha value for range between 1 to 10 with jumps of
# 0.1 and use ‘neg_root_mean_squared_error’ as the score metric

In [None]:
from sklearn.linear_model import RidgeCV

ridge_cv_model = RidgeCV(alphas=(0.1, 1.0, 10.0), scoring='neg_root_mean_squared_error')
ridge_cv_model.fit(normalised_X_train, y_train)
test_prediction = ridge_cv_model.predict(normalised_X_test)

MAE = mean_absolute_error(y_test, test_prediction)
RMSE = np.sqrt(mean_squared_error(y_test, test_prediction))

print(f'MAE: {MAE}')
print(f'RMSE: {RMSE}')

In [None]:
# Print the beta coefficients your model found for each feature

In [None]:
ridge_cv_model.coef_

In [None]:
# Print the optimal alpha and the score value that this alpha got

In [None]:
ridge_cv_model.alpha_