In [1]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import root_mean_squared_error, mean_absolute_error, r2_score 

import mlflow
import mlflow.sklearn

### Read Data and Preproccessing

In [77]:
data = pd.read_csv("abalone.csv")
data.head()

Unnamed: 0,Sex,Length,Diameter,Height,Whole weight,Shucked weight,Viscera weight,Shell weight,Rings
0,M,0.455,0.365,0.095,0.514,0.2245,0.101,0.15,15
1,M,0.35,0.265,0.09,0.2255,0.0995,0.0485,0.07,7
2,F,0.53,0.42,0.135,0.677,0.2565,0.1415,0.21,9
3,M,0.44,0.365,0.125,0.516,0.2155,0.114,0.155,10
4,I,0.33,0.255,0.08,0.205,0.0895,0.0395,0.055,7


In [78]:
y = data["Rings"]
X = data.drop(columns=["Rings"])

In [79]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [80]:
encoder = LabelEncoder()

X_train["Sex"] = encoder.fit_transform(X_train["Sex"])
X_test["Sex"] = encoder.transform(X_test["Sex"])

In [82]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [65]:
def add_noise(X, noise_level=0.1):
    noise = np.random.normal(0, noise_level, X.shape)
    return X + noise

### Testing

In [88]:
models  = {
    LinearRegression(),
    DecisionTreeRegressor(max_depth=5, min_samples_split=2, min_samples_leaf=1),
    RandomForestRegressor(n_estimators = 100, max_depth=  10,  min_samples_split= 2)
}

In [90]:
mlflow.set_experiment("Abalone Classification")
mlflow.set_tracking_uri("http://localhost:5000")

for model in models:

    model_name = type(model).__name__
    model_params = model.get_params()

    with mlflow.start_run(run_name=model_name):

        mlflow.log_params(model_params)
        model.fit(X_train_scaled, y_train)


        y_pred = model.predict(X_test_scaled)
        rmse = root_mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        results = {
        "RMSE": rmse,
        "MAE": mae,
        "R2": r2
        }
        
        mlflow.log_metrics(results)
        mlflow.sklearn.log_model(model, "model")

2025/01/10 23:32:16 INFO mlflow.tracking.fluent: Experiment with name 'Abalone Classification' does not exist. Creating a new experiment.


🏃 View run DecisionTreeRegressor at: http://localhost:5000/#/experiments/908622595079996776/runs/cb4bad7c072f4527acf11306f365e436
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776




🏃 View run RandomForestRegressor at: http://localhost:5000/#/experiments/908622595079996776/runs/1771380f45d246b18e8d2c5628cd2145
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776




🏃 View run LinearRegression at: http://localhost:5000/#/experiments/908622595079996776/runs/ddd4a7f781874f848d866eac17545e35
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776


The three gave similar results. The best model was RandomForest with the lowest RMSE and MAE. The R2 indicates no overfitting.

### Perturbation tests

In [67]:
X_train_perturbed = add_noise(X_train_scaled)
X_test_perturbed = add_noise(X_test_scaled)

In [91]:
mlflow.set_experiment("Abalone Classification")
mlflow.set_tracking_uri("http://localhost:5000")

for model in models:
    model_name = type(model).__name__
    model_params = model.get_params()

    with mlflow.start_run(run_name=model_name + " perturbed"):

        model.fit(X_train_perturbed, y_train)


        y_pred = model.predict(X_test_perturbed)
        rmse = root_mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        results = {
        "RMSE": rmse,
        "MAE": mae,
        "R2": r2
        }
        
        mlflow.log_metrics(results)
        mlflow.sklearn.log_model(model, "model")



🏃 View run DecisionTreeRegressor perturbed at: http://localhost:5000/#/experiments/908622595079996776/runs/548fc0b0190045a28e039ebf123dd439
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776




🏃 View run RandomForestRegressor perturbed at: http://localhost:5000/#/experiments/908622595079996776/runs/f22e421df8304ac5ab1573a17e250537
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776




🏃 View run LinearRegression perturbed at: http://localhost:5000/#/experiments/908622595079996776/runs/d78a39e6585d4d3eb12381a1a0260b32
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776


When adding noise to the data, the results remained close to the previous ones, and RandomForst remained the best.

### Invariance tests

In [69]:
scaler2 = StandardScaler()
X_train_invariance = scaler2.fit_transform(X_train.drop(columns=["Sex"]))
X_test_invariance = scaler2.transform(X_test.drop(columns=["Sex"]))

In [92]:
mlflow.set_experiment("Abalone Classification")
mlflow.set_tracking_uri("http://localhost:5000")

for model in models:
    model_name = type(model).__name__
    model_params = model.get_params()

    with mlflow.start_run(run_name=model_name + " Invariance"):

        model.fit(X_train_invariance, y_train)


        y_pred = model.predict(X_test_invariance)
        rmse = root_mean_squared_error(y_test, y_pred)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        results = {
        "RMSE": rmse,
        "MAE": mae,
        "R2": r2
        }
        
        mlflow.log_metrics(results)
        mlflow.sklearn.log_model(model, "model")



🏃 View run DecisionTreeRegressor Invariance at: http://localhost:5000/#/experiments/908622595079996776/runs/0b029373d41d436a8683f63395a08558
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776




🏃 View run RandomForestRegressor Invariance at: http://localhost:5000/#/experiments/908622595079996776/runs/392405c531fa46838557882df0bc8dea
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776




🏃 View run LinearRegression Invariance at: http://localhost:5000/#/experiments/908622595079996776/runs/a0fec68be3354dc3a1f6ebba2cde80ca
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776


When drop sex column from data, the results remained close to the previous ones, and RandomForst remained the best.

### Directional expectation tests

In [93]:
df = pd.DataFrame([["M", 0.445, 0.365, 0.095, 
                              0.5140, 0.2245, 0.1010, 0.150]], 
                            columns=X_train.columns)

df_add = pd.DataFrame([["M", 0.85, 0.365, 0.095, 
                              0.5140, 0.2245, 0.1010, 0.150]], 
                            columns=X_train.columns)

df["Sex"] = encoder.transform(df["Sex"])
df_add["Sex"] = encoder.transform(df_add["Sex"])


df_scaled = scaler.transform(df)
df_add_scaled = scaler.transform(df_add)

In [95]:
mlflow.set_experiment("Abalone Classification")
mlflow.set_tracking_uri("http://localhost:5000")

for model in models:
    model_name = type(model).__name__
    model_params = model.get_params()

    with mlflow.start_run(run_name=model_name + " Directional"):

        model.fit(X_train_scaled, y_train)

        y_pred = model.predict(df_scaled)
        y_pred_add = model.predict(df_add_scaled)

        results = {
        "Original Target": 15,
        "Predict Target": y_pred,
        "Predict Target Add": y_pred_add
        }
        
        mlflow.log_metrics(results)
        mlflow.sklearn.log_model(model, "model")



🏃 View run DecisionTreeRegressor Directional at: http://localhost:5000/#/experiments/908622595079996776/runs/e7ec79f3fe624496860b10ceebc398aa
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776




🏃 View run RandomForestRegressor Directional at: http://localhost:5000/#/experiments/908622595079996776/runs/d12c2673cee94c17ba1ac7f4c4efec83
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776




🏃 View run LinearRegression Directional at: http://localhost:5000/#/experiments/908622595079996776/runs/607e9e82306040359b58e3c7d97825ad
🧪 View experiment at: http://localhost:5000/#/experiments/908622595079996776


When trying a single row, all three gave bad results. When increasing the length, the number of rings should increase, and no model achieved this. The least bad was randomForst.