In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder


In [2]:

# Simple Python-Only MLP Implementation
class SimpleMLP:
    def __init__(self, input_size, hidden_size, output_size):
        self.weights1 = np.random.randn(input_size, hidden_size) * 0.1
        self.weights2 = np.random.randn(hidden_size, output_size) * 0.1
        self.bias1 = np.zeros((1, hidden_size))
        self.bias2 = np.zeros((1, output_size))

    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

    def forward(self, x):
        hidden = self.sigmoid(np.dot(x, self.weights1) + self.bias1)
        output = self.sigmoid(np.dot(hidden, self.weights2) + self.bias2)
        return output


In [3]:

# High-Level PyTorch MLP Implementation
class TorchMLP(nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(TorchMLP, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_size, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x


In [4]:

# Load and preprocess the Ecoli dataset
ecoli_data_path = 'C:\\Users\\fhitl\\Documents\\Deep_Neural_Networks_Assignments\\datasets\\ecoli.data'  # File path
column_names = ['sequence_name', 'mcg', 'gvh', 'lip', 'chg', 'aac', 'alm1', 'alm2', 'class']
ecoli_df = pd.read_csv(ecoli_data_path, delim_whitespace=True, names=column_names)


In [5]:

# Filter for 'cp' and 'im' classes
filtered_ecoli_df = ecoli_df[ecoli_df['class'].isin(['cp', 'im'])]


In [6]:

# Separate features and labels
features = filtered_ecoli_df.drop(['sequence_name', 'class'], axis=1)
labels = filtered_ecoli_df['class']


In [7]:

# Encode labels
label_encoder = LabelEncoder()
encoded_labels = label_encoder.fit_transform(labels)


In [8]:

# Normalize features
scaler = StandardScaler()
normalized_features = scaler.fit_transform(features)


In [9]:

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(normalized_features, encoded_labels, test_size=0.2, random_state=42)


In [10]:

# Convert to NumPy arrays for Python-only MLP
X_train_np, X_test_np, y_train_np, y_test_np = map(np.array, [X_train, X_test, y_train, y_test])


In [11]:

# Convert to PyTorch tensors for PyTorch MLP
X_train_torch = torch.tensor(X_train, dtype=torch.float32)
X_test_torch = torch.tensor(X_test, dtype=torch.float32)
y_train_torch = torch.tensor(y_train, dtype=torch.long)
y_test_torch = torch.tensor(y_test, dtype=torch.long)


In [12]:

# Initialize and use SimpleMLP
py_mlp = SimpleMLP(input_size=7, hidden_size=10, output_size=2)
py_output = py_mlp.forward(X_train_np[0:1])  # Forward pass with first training example
print("Output from Python-only MLP:", py_output)

# Initialize and train TorchMLP
torch_mlp = TorchMLP(input_size=7, hidden_size=10, output_size=2)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(torch_mlp.parameters(), lr=0.001)

train_dataset = TensorDataset(X_train_torch, y_train_torch)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Training loop for TorchMLP
for epoch in range(50):  # Number of epochs
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = torch_mlp(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

# Evaluate TorchMLP
torch_mlp.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in DataLoader(TensorDataset(X_test_torch, y_test_torch), batch_size=32, shuffle=False):
        outputs = torch_mlp(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy of the PyTorch model on the test set: {100 * correct / total}%')


Output from Python-only MLP: [[0.46846946 0.53255427]]
Accuracy of the PyTorch model on the test set: 97.72727272727273%
