# Imports

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
pd.set_option('display.max_columns', 500)
# pd.options.plotting.backend = "plotly"
import numpy as np
import pickle

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn import metrics

from config.config import ROOT_PATH

# Data

In [None]:
df = pd.read_csv(ROOT_PATH / "data/abalone.csv")

# EDA

In [None]:
df.head()


In [None]:
df.shape

In [None]:
df.info()

In [None]:
df.describe()

# Modelling

## Preprocessing

In [None]:
# Preprocess the data
df.columns = [col.replace(" ", "_").lower() for col in df.columns]
encoder = OneHotEncoder(handle_unknown='ignore')
df_cat = encoder.fit_transform(df.select_dtypes(include='object')).toarray()
df_num = df.select_dtypes(include='number')
data = pd.concat([df_num, pd.DataFrame(df_cat, columns=encoder.categories_[0].tolist())], axis=1)

X = data.drop('rings', axis=1)
y = data['rings']


# with open('encoder.pkl', 'wb') as f:
#     pickle.dump(encoder, f)

In [None]:
display(X.head())
display(y.head())

## Train test split

In [None]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Training

In [None]:
# Train the model
model = LinearRegression()
model.fit(X_train, y_train)

# with open(ROOT_PATH / "abalone.pkl", "wb") as f:
#     pickle.dump(model, f)

In [None]:
X_train

## Evaluation

In [None]:
# Make predictions
y_pred = model.predict(X_test)

def mean_absolute_percentage_error(y_true, y_pred): 
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.mean(np.abs((y_true - y_pred) / y_true))

# Evaluate the model
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
mape = mean_absolute_percentage_error(y_test, y_pred)
print('Mean Absolute Percentage Error (MAPE):', mape)