In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd


In [2]:
from pathlib import Path
import os

current_directory = os.getcwd()

train = pd.read_csv("dataset/sign_mnist_train.csv")
test = pd.read_csv("dataset/sign_mnist_test.csv")

label_train = train["label"]
label_test = test["label"]
del train["label"]
del test["label"]

from sklearn.preprocessing import LabelBinarizer
lb = LabelBinarizer()

#converting labels to binary for more efficient training
label_train = lb.fit_transform(label_train)
label_test = lb.fit_transform(label_test)

image_train = train.values
image_test = test.values


# dividing pixel values to keeop them between 0 and 1 for efficient training
image_train = image_train.astype(float) / 255
image_test = image_test.astype(float) / 225

image_train = image_train.reshape(-1, 28, 28, 1)
image_test = image_test.reshape(-1, 28, 28, 1)


In [3]:
import torch
import torch.nn as nn

In [4]:
import torch
import torch.nn as nn

class SignLanguageCNN(nn.Module):
    def __init__(self):
        super(SignLanguageCNN, self).__init__()
        # first layer
        self.conv1 = nn.Conv2d(1, 75, kernel_size=3, stride=1, padding=1)  # input channels=1, output channels=75
        self.bn1 = nn.BatchNorm2d(75)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        # second layer
        self.conv2 = nn.Conv2d(75, 50, kernel_size=3, stride=1, padding=1)  # input channels=75, output channels=50
        self.dropout = nn.Dropout(0.2)
        self.bn2 = nn.BatchNorm2d(50)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        # third layer
        self.conv3 = nn.Conv2d(50, 25, kernel_size=3, stride=1, padding=1)  # input channels=50, output channels=25
        self.bn3 = nn.BatchNorm2d(25)
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, padding=1)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(25*4*4, 512)  # 4x4 is the spatial dimension after three max-pooling layers
        self.dropout2 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(512, 24)  # output layer

    def forward(self, x):
        x = self.pool1(nn.functional.relu(self.bn1(self.conv1(x))))
        x = self.dropout(x)
        x = self.pool2(nn.functional.relu(self.bn2(self.conv2(x))))
        x = self.pool3(nn.functional.relu(self.bn3(self.conv3(x))))
        x = self.flatten(x)
        x = nn.functional.relu(self.fc1(x))
        x = self.dropout2(x)
        x = nn.functional.softmax(self.fc2(x), dim=1)
        return x


In [7]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

# Assuming you have already defined and loaded your model, data, and other variables

# Define the number of epochs and batch size
epochs = 10
batch_size = 64

# Convert data to PyTorch tensors and float type
image_train = torch.tensor(image_train).float()
label_train = torch.tensor(label_train).long()  # Assuming labels are integers (class indices)
image_test = torch.tensor(image_test).float()
label_test = torch.tensor(label_test).long()    # Assuming labels are integers (class indices)

# Create DataLoader for training and testing datasets
train_dataset = TensorDataset(image_train, label_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_dataset = TensorDataset(image_test, label_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Initialize the model
model = SignLanguageCNN()

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(epochs):
    model.train()  # Set the model to training mode
    running_loss = 0.0
    for inputs, labels in train_loader:
        optimizer.zero_grad()  # Zero the gradients
        outputs = model(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Calculate the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update weights
        running_loss += loss.item() * inputs.size(0)
    
    # Print average training loss for the epoch
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_dataset)}")

# Save the trained model
torch.save(model.state_dict(), 'trained_model.pth')
print("Model saved successfully!")

# Optionally, evaluate the model on the test dataset
model.eval()  # Set the model to evaluation mode
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Accuracy on test set: {100 * correct / total}%")


  image_train = torch.tensor(image_train).float()
  label_train = torch.tensor(label_train).float()
  image_test = torch.tensor(image_test).float()
  label_test = torch.tensor(label_test).float()


RuntimeError: Given groups=1, weight of size [75, 1, 3, 3], expected input[64, 28, 28, 1] to have 1 channels, but got 28 channels instead

In [11]:
print("image_train shape:", image_train.size())
print("label_train shape:", label_train.size())


image_train shape: torch.Size([27455, 28, 28, 1])
label_train shape: torch.Size([27455, 24])
