In [35]:
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split

In [36]:
import torch
import torch.nn as nn
import torch.nn.functional as F

In [37]:
# Load the iris dataset
url = 'https://gist.githubusercontent.com/curran/a08a1080b88344b0c8a7/raw/0e7a9b0a5d22642a06d3d5b9bcbad9890c8ee534/iris.csv'
df = pd.read_csv(url)
df.tail()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica
149,5.9,3.0,5.1,1.8,virginica


In [38]:
df['species'] = df['species'].replace('setosa', 0.0)
df['species'] = df['species'].replace('versicolor', 1.0)
df['species'] = df['species'].replace('virginica', 2.0)

In [39]:
# Train Test split, Set x, y
x = df.drop('species', axis=1)
y = df['species']
# Convert to numpy  arrays
x = x.values
y = y.values
# Split
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [40]:
# Convert to tensors
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)

In [41]:
# Create a Model Class
class Model(nn.Module):

  def __init__(self, num_inputs, num_outputs):
    super().__init__()
    self.layers = torch.nn.Sequential(
      # 1st hidden layer
      torch.nn.Linear(num_inputs, 50),
      torch.nn.ReLU(),
      # 2nd hidden layer
      torch.nn.Linear(50, 30),
      torch.nn.ReLU(),
      # output layer
      torch.nn.Linear(30, num_outputs),
    )
  def forward(self, x):
    logits = self.layers(x)
    return logits

In [42]:
# Make the random number initialization reproducible by seeding PyTorch's random number
torch.manual_seed(41)
model = Model(4, 3)

In [43]:
# Dataset class
from torch.utils.data import Dataset
class DDataset(Dataset):
  def __init__(self, X, y):
    self.features = X
    self.labels = y
  def __getitem__(self, index):
    one_x = self.features[index]
    one_y = self.labels[index]
    return one_x, one_y
  def __len__(self):
    return self.labels.shape[0]

train_ds = DDataset(X_train, y_train)
test_ds = DDataset(X_test, y_test)


In [44]:
# DataLoader
from torch.utils.data import DataLoader
torch.manual_seed(123)
train_loader = DataLoader(
dataset=train_ds,
batch_size=18,
shuffle=True,
num_workers=0,
drop_last=True # Drop the last batch in each epoch
)
test_loader = DataLoader(
dataset=test_ds,
batch_size=18,
shuffle=True,
num_workers=0
)

In [45]:
torch.manual_seed(123)
model = Model(num_inputs=4, num_outputs=3)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
num_epochs = 100
for i in range(num_epochs):
  model.train()
  for batch_idx, (features, labels) in enumerate(train_loader):
    logits = model(features)
    loss = F.cross_entropy(logits, labels)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    ### LOGGING
  if i % 10 ==0:
    print(f"Epoch: {i} Train Loss: {loss}")
  model.eval()

Epoch: 0 Train Loss: 1.0076385736465454
Epoch: 10 Train Loss: 0.11703266203403473
Epoch: 20 Train Loss: 0.03776945173740387
Epoch: 30 Train Loss: 0.003079317044466734
Epoch: 40 Train Loss: 0.05086169019341469
Epoch: 50 Train Loss: 0.03608591854572296
Epoch: 60 Train Loss: 0.1641068011522293
Epoch: 70 Train Loss: 0.03241885080933571
Epoch: 80 Train Loss: 0.0024608075618743896
Epoch: 90 Train Loss: 0.015967806801199913


In [46]:
model.eval()
with torch.no_grad():
  outputs = model(X_train)
# print(outputs)

In [47]:
predictions = torch.argmax(outputs, dim=1)
print(predictions)

tensor([0, 0, 2, 0, 1, 0, 0, 2, 2, 2, 0, 0, 0, 2, 1, 2, 0, 2, 1, 2, 2, 0, 1, 1,
        2, 1, 1, 2, 2, 0, 1, 1, 2, 0, 2, 2, 2, 0, 0, 0, 1, 0, 0, 1, 1, 0, 2, 1,
        2, 1, 1, 0, 1, 1, 1, 0, 2, 2, 1, 0, 1, 2, 0, 2, 0, 0, 1, 2, 0, 1, 0, 2,
        1, 1, 1, 0, 1, 0, 2, 1, 0, 0, 2, 1, 0, 0, 0, 2, 0, 2, 2, 0, 0, 0, 0, 1,
        2, 0, 1, 2, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 0, 1, 1, 0, 2, 2, 0, 1, 0, 1])


In [48]:
torch.sum(predictions == y_train)

tensor(119)

In [49]:
def compute_accuracy(model, dataloader):
  model = model.eval()
  correct = 0.0
  total_examples = 0
  for idx, (features, labels) in enumerate(dataloader):
    with torch.no_grad():
      logits = model(features)
    predictions = torch.argmax(logits, dim=1)
    compare = labels == predictions
    correct += torch.sum(compare)
    total_examples += len(compare)
  return (correct / total_examples).item()

In [50]:
print(compute_accuracy(model, train_loader))

0.9907407164573669


In [51]:
print(compute_accuracy(model, test_loader))

0.9666666388511658
