# Model training

In [1]:
import torch
from torch import nn
import pandas as pd

In [2]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [3]:
classes = pd.read_excel("./../data/text_labels.xlsx", 1).cogn_func.dropna().to_numpy()

In [4]:
# Load data

X_train = pd.read_csv("./../data/features/in_features_train.csv")
X_train = torch.from_numpy(X_train.to_numpy()).type(torch.float)
X_train.shape

torch.Size([71, 70])

In [5]:
X_train.dtype

torch.float32

In [6]:
y_train = pd.read_csv("./../data/features/out_features_train.csv")
y_train = torch.from_numpy(y_train.cogn_func.to_numpy()).type(torch.LongTensor)
y_train.shape

torch.Size([71])

In [7]:
X_test = pd.read_csv("./../data/features/in_features_test.csv")
X_test = torch.from_numpy(X_test.to_numpy()).type(torch.float)
X_test.shape

torch.Size([18, 70])

In [8]:
y_test = pd.read_csv("./../data/features/out_features_test.csv")
y_test = torch.from_numpy(y_test.cogn_func.to_numpy()).type(torch.LongTensor)
y_test.shape

torch.Size([18])

## Support Vector Machine (SVM)

In [9]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, KFold

svm = SVC(kernel='rbf')
p_grid = {"C": [1, 10, 100], "gamma": [0.01, 0.1]}
cv = KFold(n_splits=5, shuffle=True)

clf = GridSearchCV(estimator=svm, param_grid=p_grid, cv=cv, scoring="accuracy")

In [10]:
clf.fit(X_train, y_train)

In [11]:
clf.predict(X_test)

array([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6], dtype=int64)

In [12]:
# Evaluate

print("SVM Accuracy: ", clf.score(X_test, y_test))

SVM Accuracy:  0.16666666666666666


In [31]:
import joblib
from pathlib import Path


# Create models directory (if it doesn't already exist), see: https://docs.python.org/3/library/pathlib.html#pathlib.Path.mkdir
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, # create parent directories if needed
                 exist_ok=True # if models directory already exists, don't error
)

# Create model save path
MODEL_NAME = "svm.pk"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

joblib.dump(clf, MODEL_SAVE_PATH)

['models\\svm.pk']

## Linear model

In [13]:
in_features, out_features = X_train.shape[1], len(classes)
in_features, out_features

(70, 8)

In [14]:
from modules.model_builder import BaseModel

model_0 = BaseModel(in_features, 10, out_features).to(device)

In [15]:
from modules.engine import train

loss_fn = nn.CrossEntropyLoss()
optimizer_class = torch.optim.SGD

train(model_0, X_train, X_test, y_train, y_test,
      loss_fn, optimizer_class, 100, 0.1, device)

In [16]:
model_0.eval()
with torch.inference_mode():
    y_logits = model_0(X_test.to(device))
    y_preds = y_logits.argmax(dim=1).to("cpu")
y_preds

tensor([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6])

In [17]:
from sklearn.metrics import accuracy_score

print("MLP Accuracy: ", accuracy_score(y_test, y_preds))

MLP Accuracy:  0.16666666666666666


In [18]:
# Create models directory (if it doesn't already exist), see: https://docs.python.org/3/library/pathlib.html#pathlib.Path.mkdir
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, # create parent directories if needed
                 exist_ok=True # if models directory already exists, don't error
)

# Create model save path
MODEL_NAME = "mlp.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# Save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_0.state_dict(), # only saving the state_dict() only saves the learned parameters
           f=MODEL_SAVE_PATH)

Saving model to: models\mlp.pth


## Convolution Model

In [19]:
X_train = X_train.unsqueeze(1)
X_test = X_test.unsqueeze(1)

In [20]:
in_channels, in_length = X_train.shape[1:]
in_channels, in_length

(1, 70)

In [21]:
from modules.model_builder import ConvModel

model_1 = ConvModel(in_channels, in_length, 10, out_features).to(device)

18


In [22]:
train(model_1, X_train, X_test, y_train, y_test,
      loss_fn, optimizer_class, 100, 0.1, device)

In [23]:
model_1.eval()
with torch.inference_mode():
    y_logits = model_1(X_test.to(device))
    y_preds = y_logits.argmax(dim=1).to("cpu")
y_preds
y_logits

tensor([[ 0.0178,  0.4222,  0.4227, -0.1193, -1.3163,  0.1340,  0.7693, -0.6866],
        [ 0.0155,  0.4228,  0.4235, -0.1177, -1.3175,  0.1347,  0.7730, -0.6851],
        [ 0.0172,  0.4230,  0.4227, -0.1189, -1.3178,  0.1333,  0.7699, -0.6866],
        [ 0.0166,  0.4225,  0.4233, -0.1187, -1.3174,  0.1336,  0.7708, -0.6864],
        [ 0.0177,  0.4223,  0.4224, -0.1191, -1.3166,  0.1334,  0.7695, -0.6858],
        [ 0.0166,  0.4226,  0.4227, -0.1188, -1.3171,  0.1336,  0.7712, -0.6850],
        [ 0.0154,  0.4230,  0.4240, -0.1183, -1.3181,  0.1355,  0.7734, -0.6861],
        [ 0.0168,  0.4228,  0.4228, -0.1200, -1.3173,  0.1336,  0.7714, -0.6862],
        [ 0.0175,  0.4227,  0.4238, -0.1189, -1.3171,  0.1341,  0.7697, -0.6869],
        [ 0.0175,  0.4223,  0.4230, -0.1194, -1.3170,  0.1331,  0.7706, -0.6859],
        [ 0.0167,  0.4235,  0.4231, -0.1183, -1.3179,  0.1346,  0.7709, -0.6855],
        [ 0.0168,  0.4224,  0.4225, -0.1186, -1.3174,  0.1348,  0.7709, -0.6881],
        [ 0.0176

In [24]:
y_preds

tensor([6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6])

In [25]:
print("CNN Accuracy: ", accuracy_score(y_test, y_preds))

CNN Accuracy:  0.16666666666666666


In [26]:
# Create models directory (if it doesn't already exist), see: https://docs.python.org/3/library/pathlib.html#pathlib.Path.mkdir
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, # create parent directories if needed
                 exist_ok=True # if models directory already exists, don't error
)

# Create model save path
MODEL_NAME = "cnn.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# Save the model state dict
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_1.state_dict(), # only saving the state_dict() only saves the learned parameters
           f=MODEL_SAVE_PATH)

Saving model to: models\cnn.pth
