<a href="https://colab.research.google.com/github/afrin-afia/AIMSS_python_workshop_2023/blob/main/code/python_libraries_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#classify iris flowers using Pytorch library
#load necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F

In [10]:
#load and preprocess data
#load
iris_dataframe= sns.load_dataset('iris')                                    #we could load the dataset from a local csv file/sklearn library also
#iris_dataframe.info()
iris_dataframe= iris_dataframe.sample(frac=1)   #shuffle the data. 
iris_dataframe['species'].unique()

#preprocess 
#replace target values with numbers. Note: Assign from 0, we need it to start from 0 (explanation in next cell)
iris_dataframe=iris_dataframe.replace(to_replace="setosa",value=0)
iris_dataframe=iris_dataframe.replace(to_replace="versicolor",value=1)
iris_dataframe=iris_dataframe.replace(to_replace="virginica",value=2)
iris_dataframe.head()


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
76,6.8,2.8,4.8,1.4,1
109,7.2,3.6,6.1,2.5,2
44,5.1,3.8,1.9,0.4,0
140,6.7,3.1,5.6,2.4,2
72,6.3,2.5,4.9,1.5,1


In [14]:
#partition features and target class
Y= iris_dataframe['species']
X= iris_dataframe.drop(['species'], axis=1)
#X: convert df to array
X= X.to_numpy()
Y= Y.to_numpy()



In [32]:
#split data into train and test dataset
train_proportion= .8      #80% data for training, 20% for testing
train_len= int(train_proportion * len(X))   #len(2d_array) is #rows in that array
train_x= X[0:train_len, :]
train_y= Y[0:train_len]
test_x= X[train_len:, :]
test_y= Y[train_len: ]

#torch uses tensor instead of numpy arrays
train_x = torch.FloatTensor (train_x)
train_y= torch.LongTensor (train_y)
test_x = torch.FloatTensor (test_x)
test_y= torch.LongTensor (test_y)

In [40]:
#create model using Pytorch library
#Pytorch uses object-oriented approach to define model. This involves "class"

class create_model (nn.Module):
  def __init__(self, ip_dim, op_dim):
    super().__init__()
    #define layers here
    self.input_layer= nn.Linear (ip_dim,20)
    self.hidden_layer1= nn.Linear (20, 15)
    self.hidden_layer2= nn.Linear (15, 10)
    self.output_layer= nn.Linear (10, op_dim)

  def forward (self, x):      #define how our data (x) passes through the network
    x= self.input_layer(x)
    x= F.relu (x)
    x= self.hidden_layer1 (x)
    x= F.relu (x)
    x= self.hidden_layer2 (x)
    x= F.relu (x)
    x= self.output_layer (x)
    #output= F.log_softmax (x)
    return x


In [43]:
#get input and output dimension
num_features= X.shape[1]    # #columns in X
num_classes= len(Y)     #Y is an array of one-hot encoded vectors. Each vector's size is #classes
#print(num_features)
#print(num_classes)
mymodel= create_model(num_features, num_classes)


In [44]:
#we need compile 'mymodel' now.
#define which loss function we want to use
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(mymodel.parameters(), lr=0.01)
epochs = 50
losses=[]

for i in range(epochs):
    predicted_y = mymodel.forward(train_x)
    loss = criterion(predicted_y, train_y)
    losses.append(loss)
    print(f'epoch: {i:2}  loss: {loss.item():10.8f}')
    
    #backpropagation
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    

epoch:  0  loss: 5.13066626
epoch:  1  loss: 5.02700138
epoch:  2  loss: 4.91827965
epoch:  3  loss: 4.79836416
epoch:  4  loss: 4.63789082
epoch:  5  loss: 4.42769861
epoch:  6  loss: 4.16352606
epoch:  7  loss: 3.84316683
epoch:  8  loss: 3.45912266
epoch:  9  loss: 3.01961803
epoch: 10  loss: 2.55888605
epoch: 11  loss: 2.13917565
epoch: 12  loss: 1.81840050
epoch: 13  loss: 1.60284197
epoch: 14  loss: 1.44253659
epoch: 15  loss: 1.26540399
epoch: 16  loss: 1.06375146
epoch: 17  loss: 0.91601390
epoch: 18  loss: 0.97207832
epoch: 19  loss: 1.08503044
epoch: 20  loss: 1.03833759
epoch: 21  loss: 0.89371049
epoch: 22  loss: 0.81377125
epoch: 23  loss: 0.85344183
epoch: 24  loss: 0.89291096
epoch: 25  loss: 0.84904104
epoch: 26  loss: 0.74974728
epoch: 27  loss: 0.68904972
epoch: 28  loss: 0.69801277
epoch: 29  loss: 0.68879682
epoch: 30  loss: 0.64914024
epoch: 31  loss: 0.61258215
epoch: 32  loss: 0.57329214
epoch: 33  loss: 0.53880864
epoch: 34  loss: 0.53324366
epoch: 35  loss: 0.5

In [49]:
#evaluate model
predictions =  []
with torch.no_grad():
  for data_sample in test_x:
    y_hat = mymodel(data_sample)
    predictions.append(y_hat.argmax().item())

print(predictions)

[0, 0, 0, 2, 0, 1, 0, 0, 1, 0, 2, 1, 0, 0, 0, 0, 2, 1, 0, 2, 2, 2, 0, 0, 1, 0, 0, 0, 0, 1]


In [50]:
#print test accuracy
from sklearn.metrics import accuracy_score

accuracy_score(test_y, predictions)

0.9666666666666667