<center><h1> ABALONE </h1></center>

<img align="center" src="./images/abalone_shell.jpg" alt="Drawing" width="200"/>

------------------------------------------------------------------

## Determine the sex of abalone for given features

In [1]:
# @hidden_cell

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
df = pd.read_csv('./data/abalone_original.csv')

In [3]:
df.head()

Unnamed: 0,sex,length,diameter,height,whole-weight,shucked-weight,viscera-weight,shell-weight,rings
0,M,91,73,19,102.8,44.9,20.2,30.0,15
1,M,70,53,18,45.1,19.9,9.7,14.0,7
2,F,106,84,27,135.4,51.3,28.3,42.0,9
3,M,88,73,25,103.2,43.1,22.8,31.0,10
4,I,66,51,16,41.0,17.9,7.9,11.0,7


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4177 entries, 0 to 4176
Data columns (total 9 columns):
sex               4177 non-null object
length            4177 non-null int64
diameter          4177 non-null int64
height            4177 non-null int64
whole-weight      4177 non-null float64
shucked-weight    4177 non-null float64
viscera-weight    4177 non-null float64
shell-weight      4177 non-null float64
rings             4177 non-null int64
dtypes: float64(4), int64(4), object(1)
memory usage: 293.8+ KB


In [5]:
df.describe()

Unnamed: 0,length,diameter,height,whole-weight,shucked-weight,viscera-weight,shell-weight,rings
count,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0,4177.0
mean,104.79842,81.576251,27.90328,165.748432,71.873498,36.118722,47.766172,9.933684
std,24.018583,19.847973,8.365411,98.077804,44.39259,21.92285,27.840534,3.224169
min,15.0,11.0,0.0,0.4,0.2,0.1,0.3,1.0
25%,90.0,70.0,23.0,88.3,37.2,18.7,26.0,8.0
50%,109.0,85.0,28.0,159.9,67.2,34.2,46.8,9.0
75%,123.0,96.0,33.0,230.6,100.4,50.6,65.8,11.0
max,163.0,130.0,226.0,565.1,297.6,152.0,201.0,29.0


In [6]:
df['sex'].unique()

array(['M', 'F', 'I'], dtype=object)

In [7]:
X = df.drop('sex',axis=1)
y = df['sex']

## Building training and testing dataset

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=1)

## Model training

In [10]:
# base model
from sklearn.linear_model import LogisticRegression

In [11]:
model = LogisticRegression(multi_class='ovr')

In [12]:
model.fit(x_train,y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='ovr', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

## Model evaluation

In [13]:
from sklearn.metrics import classification_report

In [14]:
y_pred = model.predict(x_test)

In [15]:
print(classification_report(y_test,y_pred))

              precision    recall  f1-score   support

           F       0.53      0.35      0.42       260
           I       0.65      0.84      0.73       264
           M       0.52      0.53      0.53       312

    accuracy                           0.58       836
   macro avg       0.57      0.58      0.56       836
weighted avg       0.56      0.58      0.56       836



## Model saving

In [16]:
from sklearn.externals import joblib



In [17]:
PATH_MODEL = './models/model_saved.sav'
joblib.dump(model, PATH_MODEL)

['./models/model_saved.sav']

In [18]:
# model loading
model_loaded = joblib.load(PATH_MODEL)

In [33]:
model_loaded.predict(x_test)

array(['I', 'M', 'I', 'M', 'M', 'I', 'F', 'I', 'M', 'I', 'F', 'I', 'M',
       'F', 'I', 'M', 'M', 'F', 'M', 'M', 'I', 'M', 'I', 'M', 'M', 'M',
       'F', 'M', 'F', 'M', 'F', 'I', 'M', 'I', 'I', 'I', 'F', 'F', 'F',
       'M', 'F', 'M', 'F', 'F', 'I', 'I', 'M', 'I', 'M', 'M', 'M', 'I',
       'M', 'I', 'M', 'M', 'M', 'I', 'I', 'I', 'I', 'I', 'M', 'M', 'I',
       'I', 'F', 'F', 'F', 'I', 'I', 'M', 'I', 'M', 'M', 'I', 'F', 'F',
       'M', 'F', 'I', 'I', 'F', 'I', 'M', 'I', 'M', 'I', 'F', 'M', 'M',
       'I', 'I', 'M', 'I', 'M', 'F', 'F', 'I', 'M', 'I', 'M', 'I', 'I',
       'F', 'I', 'M', 'I', 'F', 'M', 'I', 'M', 'I', 'I', 'I', 'F', 'I',
       'I', 'M', 'F', 'M', 'I', 'I', 'F', 'I', 'M', 'F', 'M', 'I', 'F',
       'I', 'I', 'I', 'I', 'M', 'I', 'M', 'F', 'I', 'M', 'I', 'M', 'I',
       'M', 'I', 'F', 'M', 'F', 'M', 'M', 'M', 'I', 'I', 'M', 'M', 'M',
       'F', 'M', 'M', 'M', 'F', 'F', 'F', 'I', 'I', 'I', 'M', 'F', 'I',
       'M', 'I', 'M', 'F', 'I', 'I', 'M', 'M', 'M', 'I', 'I', 'M