In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, recall_score, f1_score, classification_report
import cv2
import numpy as np
import os
import glob
from tqdm import tqdm

# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

Using device: cpu


In [2]:
# Simple configuration
video_dir = './SportsData/'
frame_size = (128, 128)  
sequence_length = 10      
frame_skip = 2       
batch_size = 16       
num_epochs = 10        
learning_rate = 0.001


In [3]:
class SimpleBasketballDataset(Dataset):
    def __init__(self, video_dir, frame_size, sequence_length, frame_skip):
        self.video_dir = video_dir
        self.frame_size = frame_size
        self.sequence_length = sequence_length
        self.frame_skip = frame_skip
        
        # Get video files
        self.video_files = []
        for ext in ['*.mp4', '*.avi', '*.mov']:
            self.video_files.extend(glob.glob(os.path.join(video_dir, ext)))
        
        # Get labels from filenames
        self.labels = []
        valid_videos = []
        
        for video_path in self.video_files:
            filename = os.path.basename(video_path).lower()
            if 'hit' in filename:
                self.labels.append(1)
                valid_videos.append(video_path)
            elif 'miss' in filename:
                self.labels.append(0)
                valid_videos.append(video_path)
        
        self.video_files = valid_videos
        print(f"Found {len(self.video_files)} videos")
        print(f"Hit: {sum(self.labels)}, Miss: {len(self.labels) - sum(self.labels)}")
    
    def __len__(self):
        return len(self.video_files)
    
    def __getitem__(self, idx):
        video_path = self.video_files[idx]
        label = self.labels[idx]
        frames = self.extract_frames(video_path)
        return frames, label
    
    def extract_frames(self, video_path):
        cap = cv2.VideoCapture(video_path)
        frames = []
        frame_count = 0
        
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            
            if frame_count % self.frame_skip == 0:
                frame = cv2.resize(frame, self.frame_size)
                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                frames.append(frame)
            
            frame_count += 1
        
        cap.release()
        
        # Handle sequence length
        if len(frames) >= self.sequence_length:
            frames = frames[:self.sequence_length]
        else:
            while len(frames) < self.sequence_length:
                frames.append(frames[-1] if frames else np.zeros((*self.frame_size, 3)))
        
        # Convert to tensor
        frames = np.array(frames, dtype=np.float32) / 255.0
        frames = np.transpose(frames, (3, 0, 1, 2))  # (C, T, H, W)
        
        return torch.FloatTensor(frames)

In [4]:
# Create dataset
dataset = SimpleBasketballDataset(video_dir, frame_size, sequence_length, frame_skip)

# Simple train/test split
total_size = len(dataset)
train_size = int(0.7 * total_size)
test_size = total_size - train_size

train_dataset, test_dataset = torch.utils.data.random_split(dataset, [train_size, test_size])

print(f"Training samples: {len(train_dataset)}")
print(f"Testing samples: {len(test_dataset)}")

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

Found 90 videos
Hit: 71, Miss: 19
Training samples: 62
Testing samples: 28


In [5]:
class Simple3DCNN(nn.Module):
    def __init__(self, input_shape=(3, 8, 112, 112)):
        super(Simple3DCNN, self).__init__()
        
        # 3D convolutions
        self.conv1 = nn.Conv3d(3, 32, kernel_size=(3, 3, 3), padding=1)
        self.pool1 = nn.MaxPool3d(kernel_size=(1, 2, 2))
        
        self.conv2 = nn.Conv3d(32, 64, kernel_size=(3, 3, 3), padding=1)
        self.pool2 = nn.MaxPool3d(kernel_size=(2, 2, 2))
        
        self.conv3 = nn.Conv3d(64, 128, kernel_size=(3, 3, 3), padding=1)
        self.pool3 = nn.MaxPool3d(kernel_size=(2, 2, 2))
        
        # Calculate the size after convolutions
        self.feature_size = self._get_conv_output_size(input_shape)
        
        # Fully connected layers
        self.fc1 = nn.Linear(self.feature_size, 256)
        self.fc2 = nn.Linear(256, 2)  # 2 classes: Hit or Miss
        
    def _get_conv_output_size(self, input_shape):
        # Create a dummy input to calculate output size
        dummy_input = torch.zeros(1, *input_shape)
        with torch.no_grad():
            x = F.relu(self.conv1(dummy_input))
            x = self.pool1(x)
            x = F.relu(self.conv2(x))
            x = self.pool2(x)
            x = F.relu(self.conv3(x))
            x = self.pool3(x)
            return x.view(1, -1).size(1)
        
    def forward(self, x):
        # 3D convolutions
        x = F.relu(self.conv1(x))
        x = self.pool1(x)
        
        x = F.relu(self.conv2(x))
        x = self.pool2(x)
        
        x = F.relu(self.conv3(x))
        x = self.pool3(x)
        
        # Flatten and fully connected
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x


In [6]:
input_shape = (3, sequence_length, frame_size[0], frame_size[1])
model = Simple3DCNN(input_shape).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [7]:
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    correct = 0
    total = 0
    
    for batch_idx, (data, targets) in enumerate(tqdm(train_loader, desc=f'Epoch {epoch+1}')):
        data, targets = data.to(device), targets.to(device)
        
        # Forward pass
        optimizer.zero_grad()
        outputs = model(data)
        loss = criterion(outputs, targets)
        
        # Backward pass
        loss.backward()
        optimizer.step()
        
        # Statistics
        total_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += targets.size(0)
        correct += (predicted == targets).sum().item()
    
    # Print epoch results
    avg_loss = total_loss / len(train_loader)
    accuracy = 100. * correct / total
    print(f'Epoch {epoch+1}: Loss = {avg_loss:.4f}, Accuracy = {accuracy:.2f}%')


Epoch 1:   0%|          | 0/4 [00:00<?, ?it/s]

Epoch 1: 100%|██████████| 4/4 [00:40<00:00, 10.10s/it]


Epoch 1: Loss = 1.6704, Accuracy = 80.65%


Epoch 2: 100%|██████████| 4/4 [00:36<00:00,  9.23s/it]


Epoch 2: Loss = 0.5748, Accuracy = 80.65%


Epoch 3: 100%|██████████| 4/4 [00:36<00:00,  9.18s/it]


Epoch 3: Loss = 0.5416, Accuracy = 80.65%


Epoch 4: 100%|██████████| 4/4 [00:38<00:00,  9.63s/it]


Epoch 4: Loss = 0.4878, Accuracy = 80.65%


Epoch 5: 100%|██████████| 4/4 [00:38<00:00,  9.56s/it]


Epoch 5: Loss = 0.4948, Accuracy = 80.65%


Epoch 6: 100%|██████████| 4/4 [00:37<00:00,  9.50s/it]


Epoch 6: Loss = 0.4835, Accuracy = 80.65%


Epoch 7: 100%|██████████| 4/4 [00:39<00:00,  9.92s/it]


Epoch 7: Loss = 0.4781, Accuracy = 80.65%


Epoch 8: 100%|██████████| 4/4 [00:41<00:00, 10.29s/it]


Epoch 8: Loss = 0.4653, Accuracy = 80.65%


Epoch 9: 100%|██████████| 4/4 [00:41<00:00, 10.46s/it]


Epoch 9: Loss = 0.4844, Accuracy = 80.65%


Epoch 10: 100%|██████████| 4/4 [00:43<00:00, 10.91s/it]

Epoch 10: Loss = 0.4702, Accuracy = 80.65%





In [8]:
model.eval()
test_correct = 0
test_total = 0
all_predictions = []
all_targets = []

with torch.no_grad():
    for data, targets in tqdm(test_loader):
        data, targets = data.to(device), targets.to(device)
        
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        
        test_total += targets.size(0)
        test_correct += (predicted == targets).sum().item()
        
        all_predictions.extend(predicted.cpu().numpy())
        all_targets.extend(targets.cpu().numpy())

# Calculate metrics
test_accuracy = 100. * test_correct / test_total
accuracy = accuracy_score(all_targets, all_predictions)
recall = recall_score(all_targets, all_predictions, average='macro')  # or 'micro', 'weighted'
f1 = f1_score(all_targets, all_predictions, average='macro')  # or 'micro', 'weighted'

# Print results
print(f'Test Accuracy (manual): {test_accuracy:.2f}%')
print(f'Accuracy (sklearn): {accuracy:.4f}')
print(f'Recall (macro): {recall:.4f}')
print(f'F1 Score (macro): {f1:.4f}')

# Optional: Print detailed classification report
print('\nDetailed Classification Report:')
print(classification_report(all_targets, all_predictions))

  0%|          | 0/28 [00:00<?, ?it/s]

100%|██████████| 28/28 [00:26<00:00,  1.07it/s]


Test Accuracy (manual): 75.00%
Accuracy (sklearn): 0.7500
Recall (macro): 0.5000
F1 Score (macro): 0.4286

Detailed Classification Report:
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.75      1.00      0.86        21

    accuracy                           0.75        28
   macro avg       0.38      0.50      0.43        28
weighted avg       0.56      0.75      0.64        28



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
