In [None]:
# load libraries
import pandas as pd
import numpy as np

# load dataset
df = pd.read_csv('iris.csv')
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [None]:
## Type the code for part 1 here ##
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

X = df[df.columns[:-1]].to_numpy()
y_text = df[df.columns[-1]].to_numpy()
y = np.array([0 if item=='setosa' else 1 if item=='versicolor' else 2 for item in y_text])

scaler = StandardScaler()
X = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=2023)
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.15, random_state=2023)


X_train = X_train.reshape(X_train.shape[0], 4, 1).astype('float32')
X_val = X_val.reshape(X_val.shape[0], 4, 1).astype('float32') 
X_test = X_test.reshape(X_test.shape[0], 4, 1).astype('float32') 


X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((107, 4, 1), (107,), (20, 4, 1), (20,), (23, 4, 1), (23,))

In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class PrepData(Dataset):
    def __init__(self, data, label, device):
        self.data = data
        self.labels = torch.LongTensor(label)
        self.device = device
        
    def __getitem__(self, index):
        x = self.data[index]
        y = self.labels[index]
        
        self.x = torch.FloatTensor(x)
        self.y = torch.from_numpy(np.array(y))
        return x, y
    
    def __len__(self):
        return len(self.data)

    
class DLtoDevice():
    def __init__(self, data, device):
        self.data = data
        self.device = device
        
    def __iter__(self):
        for b in self.data: 
            yield to_device(b, self.device)
    
def to_device(data, device):
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)


In [None]:
class IrisClassifier(nn.Module):
  def __init__(self, num_class, in_ch, out_ch, ker_size=3):
    super(IrisClassifier, self).__init__()
    self.in_ch = in_ch
    self.out_ch = out_ch
    self.ker_size = ker_size
    
    self.conv_1d = nn.Conv1d(in_ch, out_ch, ker_size)
    self.relu = nn.ReLU(inplace=True)
    self.layer_1 = nn.Linear(out_ch, 50)
    self.layer_2 = nn.Linear(50, num_class)
        
  def forward(self,x):
    x = self.conv_1d(x)
    x = self.relu(x)
    x = x.view(x.size(0), -1)
    x = self.layer_1(x)
    x = self.relu(x)
    x = self.layer_2(x)
    return x

  def train(self, batch):
    data, labels = batch 
    loss = F.cross_entropy(self(data), labels)
    acc = accuracy(self(data), labels)
    return acc, loss
    
  def validate(self, batch):
    data, labels = batch 
    loss = F.cross_entropy(self(data), labels)
    acc = accuracy(self(data), labels)
    return {'val_loss': loss, 'val_acc': acc}
        
  def val_epoch_end(self, outputs):
    batch_losses = [x['val_loss'] for x in outputs]
    epoch_loss = torch.stack(batch_losses).mean()
    batch_accs = [x['val_acc'] for x in outputs]
    epoch_acc = torch.stack(batch_accs).mean()
    return {'val_loss': epoch_loss.item(), 'val_acc': epoch_acc.item()}
    
  def model_stat(self, epoch, result):
    print(f"Epoch {epoch}| train [acc: {result['train_acc']:.4f}, loss: {result['train_loss']:.4f}], \
          val [acc: {result['val_acc']:.4f}, val_loss: {result['val_loss']:.4f}]")

# ============================ metrics
def accuracy(outputs, labels):
  _, preds = torch.max(outputs, dim=1)
  acc = torch.sum(preds == labels).item() / len(preds)
  return torch.tensor(acc)

@torch.no_grad()
def evaluate(model, val_loader):
  return model.val_epoch_end(
    [model.validate(batch) for batch in val_loader]
    )


################
num_epochs = 30 
learning_rate = 0.001

in_ch = 4
out_ch = 4
ker_size = 1
num_class = 3

device = torch.device('cpu')

train_data = PrepData(X_train, y_train, device)
train_dataloader = DataLoader(train_data, batch_size=6)
train_dl = DLtoDevice(train_dataloader, device)

v_data = PrepData(X_val, y_val, device)
v_dataloader = DataLoader(v_data, batch_size=6)
val_dl = DLtoDevice(v_dataloader, device)

model = to_device(IrisClassifier(num_class, in_ch, out_ch, ker_size), device)
print(model)

optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
best_acc = 0

IrisClassifier(
  (conv_1d): Conv1d(4, 4, kernel_size=(1,), stride=(1,))
  (relu): ReLU(inplace=True)
  (layer_1): Linear(in_features=4, out_features=50, bias=True)
  (layer_2): Linear(in_features=50, out_features=3, bias=True)
)


In [None]:
# Train 
for epoch in range(num_epochs):
  train_acc = []
  train_losses = []
  for batch in train_dl:
    acc, loss = model.train(batch)
    train_acc.append(acc)
    train_losses.append(loss)
    loss.backward()
    optimizer.step()
    optimizer.zero_grad()
    
    # validate
    result = evaluate(model, val_dl)
    result['train_acc'] = torch.stack(train_acc).mean().item()
    result['train_loss'] = torch.stack(train_losses).mean().item()
    
  model.model_stat(epoch, result)
  if result['val_acc'] > best_acc:
    best_acc = result['val_acc']


Epoch 0| train [acc: 0.3630, loss: 1.0626],           val [acc: 0.2917, val_loss: 1.0722]
Epoch 1| train [acc: 0.3630, loss: 1.0264],           val [acc: 0.2917, val_loss: 1.0597]
Epoch 2| train [acc: 0.3630, loss: 0.9876],           val [acc: 0.2917, val_loss: 1.0466]
Epoch 3| train [acc: 0.5481, loss: 0.9446],           val [acc: 0.4167, val_loss: 1.0314]
Epoch 4| train [acc: 0.7000, loss: 0.8989],           val [acc: 0.5417, val_loss: 1.0127]
Epoch 5| train [acc: 0.7574, loss: 0.8535],           val [acc: 0.5417, val_loss: 0.9838]
Epoch 6| train [acc: 0.7759, loss: 0.8090],           val [acc: 0.5833, val_loss: 0.9447]
Epoch 7| train [acc: 0.7944, loss: 0.7642],           val [acc: 0.5833, val_loss: 0.8994]
Epoch 8| train [acc: 0.8037, loss: 0.7191],           val [acc: 0.6250, val_loss: 0.8525]
Epoch 9| train [acc: 0.8222, loss: 0.6745],           val [acc: 0.6250, val_loss: 0.8065]
Epoch 10| train [acc: 0.8426, loss: 0.6315],           val [acc: 0.6250, val_loss: 0.7612]
Epoch 11|

In [None]:
# prediction
from sklearn.metrics import accuracy_score

_test = Variable(torch.from_numpy(X_test)).float()
pred = model(to_device(_test, device))
pred = pred.cpu().detach().numpy()

print("accuracy: ", accuracy_score(y_test, np.argmax(pred, axis=1)))
print(len(pred), np.argmax(pred, axis=1))

accuracy:  0.9130434782608695
23 [2 1 1 2 1 2 1 1 0 1 0 2 1 2 0 2 0 1 0 0 1 0 2]
