In [5]:
import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
import pandas as pd
import os

In [6]:
# Load the data
path = "../data/raw"
X = np.load(os.path.join(path, 'mnist_features.npy'), allow_pickle=True)
y = np.load(os.path.join(path, 'mnist_labels.npy'), allow_pickle=True)

In [7]:
# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

In [8]:
# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# Convert the data to PyTorch tensors
X_train_torch = torch.from_numpy(X_train).float()
y_train_torch = torch.from_numpy(y_train.astype(int)).long()

In [11]:
# Create a DataLoader
dataset = TensorDataset(X_train_torch, y_train_torch)
data_loader = DataLoader(dataset, batch_size=32)

In [12]:
# Define a simple neural network
model = nn.Sequential(
    nn.Linear(784, 128),
    nn.ReLU(),
    nn.Linear(128, 10)
)

In [13]:
# Define a loss function and an optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

In [14]:
# Train the model
for epoch in range(10):
    for inputs, labels in data_loader:
        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        
        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

In [15]:
# Convert the test set to PyTorch tensors
X_test_torch = torch.from_numpy(X_test).float()
y_test_torch = torch.from_numpy(y_test.astype(int)).long()

In [16]:
# Make predictions on the test set
outputs = model(X_test_torch)
_, y_pred_torch = torch.max(outputs, 1)

In [17]:
# Print the accuracy
print(accuracy_score(y_test_torch, y_pred_torch))

0.9606428571428571
