In [4]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler
import torch
import os
from torch import nn
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
import seaborn as sns

In [15]:
# loop through all files in "data/good_data" and concatenta them into one dataframe
df = pd.concat([pd.read_csv(f"data/good_data/{file}") for file in os.listdir("data/good_data")])
# keep only common_name, condition, latitude_coordinate, longitude_coordinate, and native columns
df = df[['common_name', 'condition', 'latitude_coordinate', 'longitude_coordinate', 'native']]
# convert condition to numerical
df['condition'] = df['condition'].replace({'excellent': 4, 'good': 3, 'fair': 2, 'poor': 1, 'dead/dying': 0, 'dead': 0})
# one hot "common_name" column
df = pd.get_dummies(df, columns=["common_name"])
# one hot native column
df = pd.get_dummies(df, columns=["native"])

In [17]:
# split into X and y
X = df.drop('condition', axis=1)
y = df['condition']

In [18]:
# train neural network
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, num_classes)
    
    def forward(self, x):
        out = self.fc1(x)
        out = self.relu(out)
        out = self.fc2(out)
        return out


In [19]:
# split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

KeyboardInterrupt: 

In [None]:
# convert to torch tensors
X_train = torch.from_numpy(X_train).float()
X_test = torch.from_numpy(X_test).float()
y_train = torch.from_numpy(y_train).float()
y_test = torch.from_numpy(y_test).float()

In [None]:
# hyperparameters
input_size = 20
hidden_size = 100
num_classes = 5
num_epochs = 100
batch_size = 100
learning_rate = 0.001

# create model
model = NeuralNet(input_size, hidden_size, num_classes)

# loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# train model
total_step = len(X_train)
loss_list = []
acc_list = []
for epoch in range(num_epochs):
    for i in range(0, total_step, batch_size):
        # get batch
        X_batch = X_train[i:i+batch_size]
        y_batch = y_train[i:i+batch_size]
        
        # forward pass
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss_list.append(loss.item())
        
        # backward and optimize
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        # accuracy
        total = y_batch.size(0)
        _, predicted = torch.max(outputs.data, 1)
        correct = (predicted == y_batch).sum().item()
        acc_list.append(correct / total)
        
    if (epoch+1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Accuracy: {correct / total:.4f}')

In [None]:
# plot loss
plt.plot(loss_list)
plt.show()