# Setup

In [1]:
import os
import pickle
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn import metrics

# Prepare Data

In [2]:
fish_ds = pd.read_csv('./Fish.csv')

In [3]:
fish_ds.shape

(159, 7)

In [4]:
fish_ds.describe()

Unnamed: 0,Weight,Length1,Length2,Length3,Height,Width
count,159.0,159.0,159.0,159.0,159.0,159.0
mean,398.326415,26.24717,28.415723,31.227044,8.970994,4.417486
std,357.978317,9.996441,10.716328,11.610246,4.286208,1.685804
min,0.0,7.5,8.4,8.8,1.7284,1.0476
25%,120.0,19.05,21.0,23.15,5.9448,3.38565
50%,273.0,25.2,27.3,29.4,7.786,4.2485
75%,650.0,32.7,35.5,39.65,12.3659,5.5845
max,1650.0,59.0,63.4,68.0,18.957,8.142


Weight cannot be zero. Consider records with weight as 0 as invalid data and filter them out.

In [5]:
fish_ds = fish_ds[fish_ds['Weight'] > 0]
fish_ds.shape

(158, 7)

In [6]:
fish_ds.columns

Index(['Species', 'Weight', 'Length1', 'Length2', 'Length3', 'Height',
       'Width'],
      dtype='object')

Set the input and output columns. Length, height, and width fields will be used as the input dataset while weight will serve as the output.

In [7]:
X = fish_ds[['Length1', 'Length2', 'Length3', 'Height', 'Width']]
X.shape

(158, 5)

In [8]:
X.columns

Index(['Length1', 'Length2', 'Length3', 'Height', 'Width'], dtype='object')

In [9]:
y = fish_ds['Weight']
y.shape

(158,)

In [10]:
train_X, test_X, train_y, test_y = train_test_split(X, y, test_size=0.2)

# Model Training and Testing

In [11]:
model = LinearRegression()
model.fit(train_X, train_y)

LinearRegression()

In [12]:
predictions = model.predict(test_X)
print("Mean Squared Error:", metrics.mean_squared_error(test_y, predictions))
print("Mean Absolute Error:", metrics.mean_absolute_error(test_y, predictions))

Mean Squared Error: 8359.210163971271
Mean Absolute Error: 74.67725934383931


# Save Model

In [13]:
pickle_filename = 'model.pkl'
if os.path.exists(pickle_filename):
    os.remove(pickle_filename)
with open(pickle_filename, 'wb') as pickle_file:
    pickle.dump(model, pickle_file)