In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

# seed function to ensure consistency
def random_seed(seed_value):
    np.random.seed(seed_value)
    torch.manual_seed(seed_value)
    np.random.seed(seed_value)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

random_seed(113)







In [None]:
"""
SECTION 1: Load and prepare the datasets for training, which are divided into two files from the source datasets:
the penguin-clean-train.CSV = training datasets, comprising 70% of the original data
test-penguin-clean.CSV = datasets, or thirty percent of the original data, are used for testing.
"""

#Section 1.1: Importing Data

#load
datatrain = pd.read_csv('/content/penguins-clean-train.csv')

#Section 1.2 Preprocessing

#convert a string value to a number
datatrain.loc[datatrain['species']=='Adelie', 'species']=0
datatrain.loc[datatrain['species']=='Gentoo', 'species']=1
datatrain.loc[datatrain['species']=='Chinstrap', 'species']=2
datatrain = datatrain.apply(pd.to_numeric)

#convert a dataframe to an array
datatrain_array = datatrain.values

#split x and y (feature and target)
xtrain = datatrain_array[:,1:]
ytrain = datatrain_array[:,0]

#standardize
#palmer-penguin dataset has varying scales
scaler = StandardScaler()
xtrain = scaler.fit_transform(xtrain)
xtrain

array([[ 0.4168445 , -1.37190431,  0.59688731,  0.77741275],
       [-0.06475329, -1.67287052,  1.17335794,  0.90525041],
       [ 0.25013757, -1.77319259,  0.66894614,  0.13822447],
       [ 1.75049993, -0.71981085,  1.3174756 ,  1.67227635],
       [-0.62044306,  0.7850202 , -0.55605397, -0.50096382],
       [ 0.34275253,  0.83518123, -0.26781865, -0.05353202],
       [ 1.08367222,  0.03260467, -0.19575982, -0.66076089],
       [ 1.89868387,  1.7882409 ,  0.02041667,  0.13822447],
       [ 0.67616639,  0.53421502, -0.41193631, -0.43704499],
       [ 0.10195364, -1.27158224,  0.95718146,  0.84133158],
       [-1.25022479,  0.43389295, -1.63693642, -0.37312616],
       [-0.9909029 , -0.01755636, -0.12370099, -0.56488265],
       [ 0.60207442, -0.87029396,  1.02924029,  1.00112865],
       [-0.9909029 ,  1.98888504, -0.70017163, -0.50096382],
       [ 1.3615171 ,  0.48405399, -0.26781865, -0.56488265],
       [ 0.39832151,  0.48405399, -1.63693642, -1.20407093],
       [ 0.23161458,  0.

In [None]:
"""
Section 2: Construct and Educate Model

One hidden layer in a multilayer perceptron model.
input layer: 4 neurons; these neurons reflect the Palmer Penguin dataset's
 features.
20 neurons in the hidden layer, activated by ReLU; 3 neurons in the
output layer, which indicates the number of species; Softmax Layer

Optimizer: stochastic gradient descent without batch-size loss function;
learning rate: categorical cross entropy; epoch count: 50
"""

#hyperparameters
hl = 20
lr = 0.01
num_epoch = 40

#build model
class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(4, hl)
        self.fc2 = nn.Linear(hl, 3)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

net = Net()

#choose optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=lr)

X = torch.Tensor(xtrain).float()
Y = torch.Tensor(ytrain).long()

#train
for epoch in range(num_epoch):

    #feedforward - backprop
    optimizer.zero_grad()
    out = net(X)
    loss = criterion(out, Y)
    loss.backward()
    optimizer.step()
    acc = 100 * torch.sum(Y==torch.max(out.data, 1)[1]).double() / len(Y)
    print ('Epoch [%d/%d] Loss: %.4f   Acc: %.4f'
                   %(epoch+1, num_epoch, loss.item(), acc.item()))


Epoch [1/40] Loss: 1.0243   Acc: 45.0000
Epoch [2/40] Loss: 0.9391   Acc: 63.7500
Epoch [3/40] Loss: 0.8604   Acc: 80.0000
Epoch [4/40] Loss: 0.7885   Acc: 80.0000
Epoch [5/40] Loss: 0.7234   Acc: 80.0000
Epoch [6/40] Loss: 0.6649   Acc: 80.0000
Epoch [7/40] Loss: 0.6124   Acc: 80.0000
Epoch [8/40] Loss: 0.5655   Acc: 80.0000
Epoch [9/40] Loss: 0.5238   Acc: 80.0000
Epoch [10/40] Loss: 0.4870   Acc: 80.0000
Epoch [11/40] Loss: 0.4544   Acc: 80.0000
Epoch [12/40] Loss: 0.4256   Acc: 80.8333
Epoch [13/40] Loss: 0.3999   Acc: 82.0833
Epoch [14/40] Loss: 0.3768   Acc: 83.3333
Epoch [15/40] Loss: 0.3559   Acc: 85.4167
Epoch [16/40] Loss: 0.3366   Acc: 86.6667
Epoch [17/40] Loss: 0.3186   Acc: 88.7500
Epoch [18/40] Loss: 0.3017   Acc: 89.5833
Epoch [19/40] Loss: 0.2857   Acc: 90.4167
Epoch [20/40] Loss: 0.2705   Acc: 91.6667
Epoch [21/40] Loss: 0.2558   Acc: 91.6667
Epoch [22/40] Loss: 0.2419   Acc: 92.9167
Epoch [23/40] Loss: 0.2286   Acc: 92.9167
Epoch [24/40] Loss: 0.2160   Acc: 93.3333
E

In [None]:
"""
SECTION 3 : Testing model
"""
#load
datatest = pd.read_csv('/content/penguins-clean-test.csv')

#change string value to numeric
datatest.loc[datatest['species']=='Adelie', 'species']=0
datatest.loc[datatest['species']=='Gentoo', 'species']=1
datatest.loc[datatest['species']=='Chinstrap', 'species']=2
datatest = datatest.apply(pd.to_numeric)

#change dataframe to array
datatest_array = datatest.values

#split x and y (feature and target)
xtest = datatest_array[:,1:]
ytest = datatest_array[:,0]

#standardization
xtest = scaler.transform(xtest)

#get prediction
X = torch.Tensor(xtest).float()
Y = torch.Tensor(ytest).long()
out = net(X)
_, predicted = torch.max(out.data, 1)

#get accuration
print('Accuracy of the network %.4f %%'
      % (100 * torch.sum(Y==predicted).double() / len(Y)))

Accuracy of the network 96.0784 %
