In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import transforms, datasets
import cv2
import numpy as np

In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define transformations (resize, convert to tensor, normalize)
transform = transforms.Compose([
    transforms.Resize((128, 128)),
    transforms.ToTensor(),
])

# Load training dataset
train_dataset = datasets.ImageFolder(root="dataset/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

# Load validation dataset
val_dataset = datasets.ImageFolder(root="dataset/val", transform=transform)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

# Print class-to-label mapping
print(train_dataset.class_to_idx)  # {'rock': 0, 'paper': 1, 'scissors': 2, 'unknown': 3}


In [None]:
# Define the CNN model
class RockPaperScissorsCNN(nn.Module):
    def __init__(self):
        super(RockPaperScissorsCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 32 * 32, 128)
        self.fc2 = nn.Linear(128, 4)  # 4 output classes: rock, paper, scissors, unknown  
    
    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(x.shape[0], -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Initialize model, loss function, and optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RockPaperScissorsCNN().to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
# Placeholder for dataset (to be replaced with real dataset when ready)
def load_data():
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor(),
    ])
    train_data = datasets.FakeData(transform=transform)  # Placeholder
    train_loader = torch.utils.data.DataLoader(train_data, batch_size=32, shuffle=True)
    return train_loader

# Training function
def train_model(model, train_loader, criterion, optimizer, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")

# Start training (once dataset is ready, replace `load_data()`)
train_loader = load_data()
train_model(model, train_loader, criterion, optimizer, epochs=10)

In [None]:
# Real-time Hand Gesture Recognition
def predict_live(model, threshold=0.7):
    model.eval()
    transform = transforms.Compose([
        transforms.Resize((128, 128)),
        transforms.ToTensor()
    ])
    
    cap = cv2.VideoCapture(0)
    classes = ["Rock", "Paper", "Scissors", "Unknown"]
    
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        
        # Define ROI (Region of Interest) - Adjust as needed
        roi = cv2.resize(frame, (128, 128))
        img_tensor = transform(roi).unsqueeze(0).to(device)  # Add batch dimension
        
        with torch.no_grad():
            output = model(img_tensor)
            probabilities = F.softmax(output, dim=1)
            confidence, predicted_class = torch.max(probabilities, 1)
        
        # Display prediction if confidence is above threshold
        label = classes[predicted_class.item()] if confidence.item() >= threshold else "Unknown"
        cv2.putText(frame, f"Prediction: {label} ({confidence.item():.2f})", (50, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
        cv2.imshow("Rock Paper Scissors", frame)
        
        # Exit if 'q' is pressed
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
    
    cap.release()
    cv2.destroyAllWindows()

# Run live prediction (Uncomment after training the model)
# predict_live(model)