## Rice dataset training
 This notebook loads the Rice dataset (https://archive.ics.uci.edu/dataset/545/rice+cammeo+and+osmancik), preprocesses it and trains a simple model

In [2]:
import sys
import os
PROJ_DIR = os.path.realpath(os.path.dirname(os.path.abspath('')))
sys.path.append(os.path.join(PROJ_DIR,'src'))



Load data from CSV and save it to a suitable format. This can be skipped if concrete_data.npz is in assets.

In [53]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

def load_rice(path):
    df = pd.read_csv(path, delimiter=',', header=None)

    labels = df[7]
    x = df.drop(columns=7)

    possible_labels = labels.unique().tolist()
    print(possible_labels)
    print(len(possible_labels))
    y = labels.map(lambda x: possible_labels.index(x))

    return x.to_numpy(), y.to_numpy()
    
x, y = load_rice(os.path.join(PROJ_DIR, 'assets', 'data', 'rice.data'))

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

['Cammeo', 'Osmancik']
2


In [54]:
x_mean = x_train.mean(axis=0)
x_std = x_train.std(axis=0)

x_train = (x_train - x_mean)/x_std
x_test = (x_test - x_mean)/x_std

In [55]:
# Save to assets
np.savez(os.path.join(PROJ_DIR, 'assets', 'data', 'rice.npz'),\
        x_train=x_train,\
        x_test=x_test,\
        y_train=y_train,\
        y_test=y_test)

Load data from file

In [60]:
file_data = np.load(os.path.join(PROJ_DIR, 'assets', 'data', 'rice.npz'))
x_train = file_data['x_train']
x_test = file_data['x_test']
y_train = file_data['y_train']
y_test = file_data['y_test']

Train a MLP model

In [80]:
import torch

MODEL_NEURONS = 100
MODEL_EPOCHS= 2000
MODEL_LR = 1.0e-2
MODEL_LABEL_NUM = len(np.unique(y_train))

class MLP(torch.nn.Module):
    def __init__(self, n_neurons):
        super(MLP, self).__init__()
        self.fc1 = torch.nn.Linear(x_train.shape[1], n_neurons)
        self.ac1 = torch.nn.ReLU()
        self.fc2 = torch.nn.Linear(n_neurons, 40)
        self.ac2 = torch.nn.ReLU()
        self.fc3 = torch.nn.Linear(40, 1)
        self.ac3 = torch.nn.Sigmoid()
    
    def forward(self, x):
        x = self.fc1(x)
        x = self.ac1(x)
        x = torch.nn.functional.dropout(x, p=0.9)
        x = self.fc2(x)
        x = self.ac2(x)
        x = torch.nn.functional.dropout(x, p=0.9)
        logits = self.fc3(x)
        x = self.ac3(logits)
        return x

x_train_tensor = torch.tensor(x_train).float()
y_train_tensor = torch.tensor(y_train).float().unsqueeze(1)
x_test_tensor = torch.tensor(x_test).float()
y_test_tensor = torch.tensor(y_test).float()

class_weights = y_train_tensor.mean(dim=0)
print(class_weights)

network = MLP(MODEL_NEURONS)
loss = torch.nn.BCELoss()#weight=class_weights)
optimizer = torch.optim.Adam(network.parameters(), lr=MODEL_LR, weight_decay=1e-1)

for epoch in range(MODEL_EPOCHS):
    optimizer.zero_grad()
    
    preds = network(x_train_tensor)
    loss_value = loss(preds, y_train_tensor)
    loss_value.backward()        
    optimizer.step()

    train_accuracy = ((preds>0.5).float() == y_train_tensor).float().mean() 

    test_preds = network.forward(x_test_tensor)        
    test_accuracy = ((test_preds>0.5).float() == y_test_tensor).float().mean() 
    print(f'Epoch {epoch}/{MODEL_EPOCHS} - Loss: {loss_value.item():.7f} - Train accuracy: {train_accuracy:.4f} - Test accuracy: {test_accuracy:.4f}')  
    #if test_accuracy > 0.75: # Undertrained
    #    break
    
print(test_accuracy.item())


tensor([0.5682])
Epoch 0/2000 - Loss: 0.7507640 - Train accuracy: 0.5210 - Test accuracy: 0.4864
Epoch 1/2000 - Loss: 0.6989411 - Train accuracy: 0.5482 - Test accuracy: 0.4898
Epoch 2/2000 - Loss: 0.6415009 - Train accuracy: 0.5948 - Test accuracy: 0.4917
Epoch 3/2000 - Loss: 0.6042660 - Train accuracy: 0.6480 - Test accuracy: 0.5249
Epoch 4/2000 - Loss: 0.5863429 - Train accuracy: 0.7031 - Test accuracy: 0.5256
Epoch 5/2000 - Loss: 0.5739564 - Train accuracy: 0.7231 - Test accuracy: 0.5279
Epoch 6/2000 - Loss: 0.5582350 - Train accuracy: 0.7651 - Test accuracy: 0.5318
Epoch 7/2000 - Loss: 0.5327837 - Train accuracy: 0.7720 - Test accuracy: 0.5314
Epoch 8/2000 - Loss: 0.5241659 - Train accuracy: 0.7923 - Test accuracy: 0.5328
Epoch 9/2000 - Loss: 0.5119877 - Train accuracy: 0.7959 - Test accuracy: 0.5282
Epoch 10/2000 - Loss: 0.5078915 - Train accuracy: 0.8031 - Test accuracy: 0.5323
Epoch 11/2000 - Loss: 0.5015355 - Train accuracy: 0.7946 - Test accuracy: 0.5286
Epoch 12/2000 - Loss:

In [18]:
# Save model
torch.save(network.state_dict(), os.path.join(PROJ_DIR,'assets','models','rice-mlp.pth'))