In [26]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from torch.utils.data import DataLoader, TensorDataset

In [27]:
import pandas as pd
import pickle
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Load dataset
df = pd.read_csv("train.csv")

# Convert categorical labels to numeric (avoid SettingWithCopyWarning)
df.loc[df["label"] == 'C', "label"] = 0
df.loc[df["label"] == 'L', "label"] = 1

# Convert label column to integer type
df["label"] = df["label"].astype(int)

# Separate features and labels
y = df["label"]
X = df.drop("label", axis=1)

# Standardize features
sc = StandardScaler()

# Save the fitted scaler
with open("scaler.pkl", "rb") as f:
    sc=pickle.load(f)

X_scaled = sc.fit_transform(X)  # Fit & transform data

# Convert back to DataFrame for readability (keeping column names)
X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)

# Train-test split (80-20)
X_train, X_test, y_train, y_test = train_test_split(X_scaled_df, y, test_size=0.2, random_state=42)

print("Data Preprocessing Complete ✅")
print(f"Training samples: {X_train.shape[0]}, Test samples: {X_test.shape[0]}")

Data Preprocessing Complete ✅
Training samples: 12297, Test samples: 3075


In [28]:
X_train

Unnamed: 0,nose_x,nose_y,nose_z,nose_v,left_shoulder_x,left_shoulder_y,left_shoulder_z,left_shoulder_v,right_shoulder_x,right_shoulder_y,...,left_wrist_z,left_wrist_v,left_hip_x,left_hip_y,left_hip_z,left_hip_v,right_hip_x,right_hip_y,right_hip_z,right_hip_v
3233,-1.919143,-1.295797,0.857179,-0.709586,-2.309488,-0.495078,1.338898,-0.620790,-1.246940,-1.043267,...,1.330360,-1.609495,-0.743641,-0.093699,1.486180,-0.163932,0.273689,-0.380155,-1.487437,0.011400
5805,0.995914,-0.552773,0.382319,0.380360,0.466396,-0.039833,-1.409718,0.449428,1.331572,-0.016238,...,0.733255,0.512982,-1.341652,0.090261,-1.473218,0.510739,-0.667229,0.036238,1.473398,0.649469
12032,-0.260298,0.272264,0.538037,-2.259551,-1.199591,-0.126518,0.723282,-0.323885,-0.402866,-0.155374,...,1.498227,-1.516609,0.680396,-1.155684,0.950423,0.271546,1.385362,-1.113761,-0.948822,0.078025
2596,0.903430,-0.516779,-0.408072,0.431039,1.335731,-0.021533,0.000483,0.523074,0.304042,0.209320,...,-0.398728,0.755533,1.650925,0.430837,-0.113217,0.484369,0.835093,0.708785,0.115250,0.671388
15242,-0.725599,0.530774,0.254474,0.431336,-0.141695,1.000073,0.438491,0.517628,-1.329610,1.068714,...,0.330906,0.654004,-0.430269,-0.096791,0.021810,0.570221,-1.554040,-0.196228,-0.023292,0.783330
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5191,0.349411,0.547661,-0.187356,0.198490,-0.843283,0.542822,0.669795,-0.407235,0.062778,0.516714,...,0.945745,-1.520791,-1.109940,1.210873,1.202100,-0.235670,-0.373317,1.228776,-1.202106,-0.193448
13418,-0.582530,-1.263704,-0.791844,0.430011,0.185609,-1.177678,0.065465,0.521573,-1.172449,-1.000408,...,-1.454779,0.754662,-0.218887,-0.783886,-0.083649,0.494289,-1.265490,-0.838350,0.085728,0.621258
5390,-0.692849,0.723913,1.031027,0.430609,-0.087602,1.009549,0.934723,0.480866,-1.145662,0.984474,...,0.720374,0.721521,-0.407651,-0.357950,-0.034233,0.544624,-1.492215,-0.549411,0.032045,0.716917
860,-1.031667,-0.924628,0.715276,-0.396001,-1.840519,-0.226579,1.275513,-0.109047,-0.757398,-0.619684,...,1.256730,-1.592267,-0.173548,0.229631,1.558917,-0.000471,0.888414,-0.191484,-1.561336,0.307888


In [29]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Convert to PyTorch tensors
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32).to(device) # Convert X_train to a NumPy array using .values
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32).to(device) # Convert X_test to a NumPy array using .values
y_train_tensor = torch.tensor(y_train.values, dtype=torch.long).to(device) # Convert y_train to a NumPy array using .values
y_test_tensor = torch.tensor(y_test.values, dtype=torch.long).to(device) # Convert y_test to a NumPy array using .values

In [30]:
# Create DataLoaders
batch_size = 32
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

In [31]:
# Define Deep Learning Models
class SimpleNN(nn.Module):
    def __init__(self, input_size, output_size):
        super(SimpleNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

class DeepNN(nn.Module):
    def __init__(self, input_size, output_size):
        super(DeepNN, self).__init__()
        self.fc1 = nn.Linear(input_size, 256)
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(256, 128)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(128, output_size)

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

In [32]:
# Initialize Models
input_size = X_train.shape[1]
output_size = len(np.unique(y_train))  # Dynamically detect output classes

In [33]:

# Training parameters
epochs = 20
criterion = nn.CrossEntropyLoss()
final_results = []

import pickle

def train_and_evaluate(model, model_name):
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Training Loop
    model.train()
    for epoch in range(epochs):
        for batch_X, batch_y in train_loader:
            optimizer.zero_grad()
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            loss.backward()
            optimizer.step()

        if (epoch + 1) % 5 == 0:
            print(f"{model_name} - Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")

    # Save Model as .pkl
    model_path = f"{model_name}.pkl"
    with open(model_path, "wb") as f:
        pickle.dump(model.state_dict(), f)
    print(f"✅ Model {model_name} saved as {model_path}")

    # Evaluation
    model.eval()
    with torch.no_grad():
        y_pred_tensor = model(X_test_tensor)
        y_pred = torch.argmax(y_pred_tensor, dim=1).cpu().numpy()

    # Compute Metrics
    p_score = precision_score(y_test, y_pred, average="macro")
    a_score = accuracy_score(y_test, y_pred)
    r_score = recall_score(y_test, y_pred, average="macro")
    f1_score_result = f1_score(y_test, y_pred, average="macro")
    cm = confusion_matrix(y_test, y_pred)

    # Store results in a list
    final_results.append({
        "Model": model_name,
        "Precision": round(p_score, 3),
        "Accuracy": round(a_score, 3),
        "Recall": round(r_score, 3),
        "F1 Score": round(f1_score_result, 3),
        "Confusion Matrix": cm
    })


In [34]:
models = {
    "SimpleNN": SimpleNN(input_size, output_size).to(device),
    "DeepNN": DeepNN(input_size, output_size).to(device),
}

In [35]:
# Train all models
for name, model in models.items():
    print(f"\nTraining {name}...")
    train_and_evaluate(model, name)

# Sort results by F1 score (using the 'F1 Score' key)
final_results.sort(key=lambda k: k['F1 Score'], reverse=True)  # Access by key instead of index

# Convert to DataFrame for display
results_df = pd.DataFrame(final_results, columns=["Model", "Precision", "Accuracy", "Recall", "F1 Score", "Confusion Matrix"])  # Use actual column names
print("\nFinal Model Performance:")
print(results_df)


Training SimpleNN...
SimpleNN - Epoch [5/20], Loss: 0.0006
SimpleNN - Epoch [10/20], Loss: 0.0000
SimpleNN - Epoch [15/20], Loss: 0.0009
SimpleNN - Epoch [20/20], Loss: 0.0001
✅ Model SimpleNN saved as SimpleNN.pkl

Training DeepNN...
DeepNN - Epoch [5/20], Loss: 0.0002
DeepNN - Epoch [10/20], Loss: 0.0005
DeepNN - Epoch [15/20], Loss: 0.0003
DeepNN - Epoch [20/20], Loss: 0.0005
✅ Model DeepNN saved as DeepNN.pkl

Final Model Performance:
      Model  Precision  Accuracy  Recall  F1 Score         Confusion Matrix
0  SimpleNN      0.998     0.998   0.998     0.998   [[1658, 4], [3, 1410]]
1    DeepNN      0.996     0.996   0.996     0.996  [[1651, 11], [1, 1412]]


In [36]:
results_df

Unnamed: 0,Model,Precision,Accuracy,Recall,F1 Score,Confusion Matrix
0,SimpleNN,0.998,0.998,0.998,0.998,"[[1658, 4], [3, 1410]]"
1,DeepNN,0.996,0.996,0.996,0.996,"[[1651, 11], [1, 1412]]"
