In [None]:
import warnings
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
import matplotlib.pyplot as plt
from random import randrange
sns.set(rc={'figure.figsize':(15,8)})

In [None]:
df = pd.read_csv("./../assignment_3/data/abalone.data", names=["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"])

In [None]:
raw_abs = df.copy()
raw_abs

In [None]:
corel_plot_data = df[['rings', 'height']].copy()
melted_corel_plot_data = corel_plot_data.melt('rings', var_name='size_measurement', value_name='Height in cm')
corel_plot = sns.catplot(x = 'rings', hue = 'size_measurement', y = 'Height in cm', data = melted_corel_plot_data).set(title = "Measure of shell height Abalone size vs number of rings")

corel_plot

In [None]:
melted_corel_plot_data = corel_plot_data.melt('rings', var_name='size_measurement', value_name='Measurement in cm')
corel_plot = sns.catplot(x = 'rings', hue = 'size_measurement', y = 'Measurement in cm', data = melted_corel_plot_data).set(title = "Measures of shell Abalone size vs number of rings")

In [None]:
raw_abs.loc[raw_abs['rings'].between(0, 7,inclusive='both'), 'ring_class'] = int(1)
raw_abs.loc[raw_abs['rings'].between(8, 10,inclusive='both'), 'ring_class'] = int(2)
raw_abs.loc[raw_abs['rings'].between(11, 15,inclusive='both'), 'ring_class'] = int(3)
raw_abs.loc[raw_abs['rings'] > 15, 'ring_class'] = int(4)
raw_abs['ring_class'] = raw_abs['ring_class'].astype(int)

In [None]:
raw_abs.drop(columns = "rings", axis=1, inplace=True)


In [None]:
raw_abs = raw_abs[(raw_abs['height']<0.4) & (raw_abs['height']>0.01)]

# Visualisations

### Distribtion of ring class

In [None]:
raw_abs.head()

In [None]:
raw_abs['ring_class'].value_counts().plot(kind='barh', figsize=(8,6))
plt.ylabel("Ring-class")
plt.xlabel("Class count")
plt.grid(False)
plt.title("Abalone ring-class count", y=1.02, fontsize = 18);

In [None]:
raw_abs.hist()

In [None]:
heatmap = sns.heatmap(raw_abs.corr(), annot=True, cbar=False, vmin=-1., vmax=1., cmap=sns.cm.rocket)
heatmap.set_title('Correlation Heatmap', fontdict={'fontsize':18}, pad=12)

# Building the model

In [None]:
num_pipeline = Pipeline([
    ('Nomalisation', MinMaxScaler()),
    ])

sex_pipeline = Pipeline([
    ('ord_encoder', OrdinalEncoder(categories=[['M', 'F', 'I']]))
])

ringClass_pipeline = Pipeline([
    ('ringClass_1Hot', OneHotEncoder())
])

num_arribs = list(raw_abs.drop(columns=["sex", "ring_class"]))

full_pipeline = ColumnTransformer([
    ("num", num_pipeline, num_arribs),
    ("sex", sex_pipeline, ['sex']),
    ("ringClass_1Hot", ringClass_pipeline, ['ring_class'])
])



In [None]:
abs_prepared = pd.DataFrame(full_pipeline.fit_transform(raw_abs))
abs_prepared

In [None]:
X = abs_prepared.iloc[:,:-4]
y = abs_prepared.iloc[:,-4:]

In [None]:
# Single layer
       
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.4, random_state=42)

model = keras.models.Sequential([
keras.layers.Dense(12, input_shape = (8,), activation = "relu"),
# keras.layers.Dense(32, activation = "relu"),
keras.layers.Dense(4, activation = "softmax")
])

model.compile(loss = 'categorical_crossentropy',
        optimizer = keras.optimizers.SGD(learning_rate=0.01),
        metrics = ['accuracy']
        )

history = model.fit(X_train, y_train, epochs=500, verbose=0)

mod_eval = model.evaluate(X_test, y_test)


In [30]:
y_pred = model.predict(X_test)



In [41]:
y_pred

array([[0.35367462, 0.51822513, 0.10436802, 0.02373214],
       [0.00899165, 0.27155596, 0.5123134 , 0.20713899],
       [0.06942134, 0.45943975, 0.40140343, 0.06973547],
       ...,
       [0.15255277, 0.69768006, 0.13052702, 0.01924015],
       [0.08374643, 0.73067194, 0.16423613, 0.02134543],
       [0.07850537, 0.6113063 , 0.253427  , 0.05676131]], dtype=float32)

In [29]:
y_test

Unnamed: 0,8,9,10,11
1451,1.0,0.0,0.0,0.0
2258,0.0,0.0,0.0,1.0
731,0.0,0.0,1.0,0.0
544,0.0,0.0,1.0,0.0
2457,0.0,0.0,1.0,0.0
...,...,...,...,...
1389,0.0,1.0,0.0,0.0
3471,1.0,0.0,0.0,0.0
2648,0.0,1.0,0.0,0.0
1607,0.0,1.0,0.0,0.0
