In [13]:
import warnings
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from sklearn.preprocessing import OrdinalEncoder, OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.pyplot import title
from random import randrange
sns.set(rc={'figure.figsize':(15,8)})

In [2]:
df = pd.read_csv("./data/abalone.data", names=["sex", "length", "diameter", "height", "whole_weight", "shucked_weight", "viscera_weight", "shell_weight", "rings"])

In [3]:
df

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [4]:
raw_abs = df.copy()
raw_abs

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,rings
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,15
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,7
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,9
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,10
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,7
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,11
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,10
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,9
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,10


In [5]:
raw_abs.loc[raw_abs['rings'].between(0, 7,inclusive='both'), 'ring_class'] = int(1)
raw_abs.loc[raw_abs['rings'].between(8, 10,inclusive='both'), 'ring_class'] = int(2)
raw_abs.loc[raw_abs['rings'].between(11, 15,inclusive='both'), 'ring_class'] = int(3)
raw_abs.loc[raw_abs['rings'] > 15, 'ring_class'] = int(4)
raw_abs['ring_class'] = raw_abs['ring_class'].astype(int)

In [6]:
raw_abs.drop(columns = "rings", axis=1, inplace=True)


In [7]:
num_pipeline = Pipeline([
    ('Nomalisation', MinMaxScaler()),
    ])

sex_pipeline = Pipeline([
    ('ord_encoder', OrdinalEncoder(categories=[['M', 'F', 'I']]))
])

ringClass_pipeline = Pipeline([
    ('ringClass_1Hot', OneHotEncoder())
])

num_arribs = list(raw_abs.drop(columns=["sex", "ring_class"]))

full_pipeline = ColumnTransformer([
    ("num", num_pipeline, num_arribs),
    ("sex", sex_pipeline, ['sex']),
    ("ringClass_1Hot", ringClass_pipeline, ['ring_class'])
])



In [8]:
abs_prepared = pd.DataFrame(full_pipeline.fit_transform(raw_abs))
abs_prepared

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11
0,0.513514,0.521008,0.084071,0.181335,0.150303,0.132324,0.147982,0.0,0.0,0.0,1.0,0.0
1,0.371622,0.352941,0.079646,0.079157,0.066241,0.063199,0.068261,0.0,1.0,0.0,0.0,0.0
2,0.614865,0.613445,0.119469,0.239065,0.171822,0.185648,0.207773,1.0,0.0,1.0,0.0,0.0
3,0.493243,0.521008,0.110619,0.182044,0.144250,0.149440,0.152965,0.0,0.0,1.0,0.0,0.0
4,0.344595,0.336134,0.070796,0.071897,0.059516,0.051350,0.053313,2.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...
4172,0.662162,0.663866,0.146018,0.313441,0.248151,0.314022,0.246637,1.0,0.0,0.0,1.0,0.0
4173,0.695946,0.647059,0.119469,0.341420,0.294553,0.281764,0.258097,0.0,0.0,1.0,0.0,0.0
4174,0.709459,0.705882,0.181416,0.415796,0.352724,0.377880,0.305431,0.0,0.0,1.0,0.0,0.0
4175,0.743243,0.722689,0.132743,0.386931,0.356422,0.342989,0.293473,1.0,0.0,1.0,0.0,0.0


In [9]:
raw_abs

Unnamed: 0,sex,length,diameter,height,whole_weight,shucked_weight,viscera_weight,shell_weight,ring_class
0,M,0.455,0.365,0.095,0.5140,0.2245,0.1010,0.1500,3
1,M,0.350,0.265,0.090,0.2255,0.0995,0.0485,0.0700,1
2,F,0.530,0.420,0.135,0.6770,0.2565,0.1415,0.2100,2
3,M,0.440,0.365,0.125,0.5160,0.2155,0.1140,0.1550,2
4,I,0.330,0.255,0.080,0.2050,0.0895,0.0395,0.0550,1
...,...,...,...,...,...,...,...,...,...
4172,F,0.565,0.450,0.165,0.8870,0.3700,0.2390,0.2490,3
4173,M,0.590,0.440,0.135,0.9660,0.4390,0.2145,0.2605,2
4174,M,0.600,0.475,0.205,1.1760,0.5255,0.2875,0.3080,2
4175,F,0.625,0.485,0.150,1.0945,0.5310,0.2610,0.2960,2


In [10]:
X = abs_prepared.iloc[:,:-1]
y = abs_prepared.iloc[:,-1]


In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size = 0.4, random_state=42)

In [12]:
X_train

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
3805,0.472973,0.420168,0.097345,0.126439,0.115669,0.114549,0.095665,0.0,0.0,1.0,0.0
1389,0.756757,0.731092,0.141593,0.389233,0.358440,0.244898,0.342800,0.0,0.0,1.0,0.0
3484,0.540541,0.613445,0.141593,0.250576,0.234701,0.197498,0.182362,2.0,0.0,1.0,0.0
4100,0.810811,0.781513,0.154867,0.528422,0.494620,0.401580,0.367215,1.0,0.0,1.0,0.0
1607,0.628378,0.621849,0.119469,0.242253,0.233020,0.202765,0.210762,2.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...
3444,0.560811,0.579832,0.101770,0.200815,0.171486,0.173799,0.143000,1.0,0.0,1.0,0.0
466,0.804054,0.831933,0.168142,0.491766,0.364156,0.398947,0.397110,1.0,0.0,0.0,1.0
3092,0.587838,0.571429,0.110619,0.204888,0.163416,0.175115,0.185850,0.0,0.0,0.0,1.0
3772,0.675676,0.689076,0.106195,0.372410,0.346335,0.287031,0.232686,0.0,0.0,1.0,0.0


In [18]:
X_train.iloc[:1,:7]

Unnamed: 0,0,1,2,3,4,5,6
3805,0.472973,0.420168,0.097345,0.126439,0.115669,0.114549,0.095665


In [23]:
# def init_model(hidden_neurons, hidden_layers = 1):
#     if hidden_layers == 1:
#         model = keras.models.Sequential([
#             keras.layers.InputLayer(input_dim = 7),
#             keras.layers.Dense(hidden_neurons, activation = "relu"),
#             keras.layers.Dense(4, activation = "softmax")
#         ])
#         return(model)
#     else:
#         model = keras.models.Sequential([
#             keras.layers.InputLayer(input_dim = 7),
#             keras.layers.Dense(hidden_neurons, activation = "relu"),
#             keras.layers.Dense(hidden_neurons, activation = "relu"),
#             keras.layers.Dense(4, activation = "softmax")
#         ])
#         return(model)

In [27]:
model = keras.models.Sequential([
    tf.keras.Input(shape=(7,)),
    keras.layers.Dense(5, activation = "relu"),
    keras.layers.Dense(4, activation = "softmax")
])