In [1]:
import csv
import re
import numpy as np
from collections import defaultdict
import torch
import torch.optim as optim
from torch import nn 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Importing

In [14]:
data = []

with open('almonds/Almond.csv', 'r') as dataset:
    reader = csv.reader(dataset)

    headers = [re.sub(r'\s*\(.*\)','',header).strip() for header in next(reader)[1:]]
    types = []

    for line in reader:
        row = {}
        for x, value in enumerate(line[1:]):
            if x < len(headers) - 1:
                row[headers[x]] = float(value) if value != '' else None
            else:
                if value not in types:
                    types.append(value)
                row[headers[x]] = types.index(value) if value != '' else None
        data.append(row)

### Preprocessing

In [15]:
nones = defaultdict(int)

for instance in data:
    for feature, value in instance.items():
        if value == None:
            nones[feature] += 1

lengths = [instance['Length'] for instance in data]
widths = [instance['Width'] for instance in data]
thicknesses = [instance['Thickness'] for instance in data]

ratios = [instance['Length'] / instance['Width'] for instance in data if instance['Length'] is not None and instance['Width'] is not None]
median_ratio = np.median(ratios)

thickness_median = np.median([value for value in thicknesses if value is not None])

for instance in data:
    if instance['Length'] is None:
        instance['Length'] = instance['Width'] * median_ratio      
    elif instance['Width'] is None:
        instance['Width'] = instance['Length'] / median_ratio
    elif instance['Thickness'] is None:
        instance['Thickness'] = thickness_median
        
    if instance['Roundness'] is None: 
        instance['Roundness'] = 4 * instance['Area'] / (np.pi * instance['Length'] ** 2)
    if instance['Aspect Ratio'] is None: 
        instance['Aspect Ratio'] = instance['Length'] / instance['Width']
    if instance['Eccentricity'] is None:
        instance['Eccentricity'] = np.sqrt(1 - (instance['Width'] / instance['Length'])**2)


### Setup of NN using PyTorch

In [16]:
features = headers[:-1]
target = headers[-1]

X = [[instance[feature] for feature in features] for instance in data]
y = [instance[target] for instance in data]

X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

X_train, X_val_test, y_train, y_val_test = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_val_test, y_val_test, test_size=0.5, random_state=42)

#optional
scaler = StandardScaler()
X_train = torch.tensor(scaler.fit_transform(X_train), dtype=torch.float32)
X_val = torch.tensor(scaler.transform(X_val), dtype=torch.float32)
X_test = torch.tensor(scaler.transform(X_test), dtype=torch.float32)

class AlmondNN(nn.Module):
    def __init__(self):
        super(AlmondNN, self).__init__()
        self.fc1 = nn.Linear(len(features), 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc4 = nn.Linear(32, 16)
        self.fc5 = nn.Linear(16, len(types))

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = self.fc5(x)
        return x 
    
model = AlmondNN()
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

epochs = 100

for epoch in range(epochs):
    model.train()

    optimizer.zero_grad()

    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    loss.backward()
    optimizer.step()

    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}')

model.eval()

with torch.no_grad():
    outputs = model(X_test)
    _, predicted = torch.max(outputs, 1)

    accuracy = (predicted == y_test).sum().item() / len(y_test)
    print(f'Accuracy: {accuracy * 100:.2f}%')