In [1]:
# Install PyTorch and Torchvision
!pip install torch torchvision

# Install Roboflow for dataset download
!pip install roboflow

# Install pycocotools for handling COCO datasets
!pip install pycocotools





[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip






[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 23.2.1 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip





In [2]:
import os
import torch
import torchvision
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms as T
from torchvision.models.detection import FasterRCNN
from torchvision.models import mobilenet_v2
from torchvision.ops import MultiScaleRoIAlign
from PIL import Image
from pycocotools.coco import COCO
from roboflow import Roboflow
import time


In [None]:
# Replace 'your_roboflow_api_key' with your actual Roboflow API key
rf = Roboflow(api_key="upR0TpbLVIlvsGJwoHe5")
project = rf.workspace("cpen-355").project("basketball-scorekeeper-uij6y")
version = project.version(2)
dataset = version.download("coco")


loading Roboflow workspace...
loading Roboflow project...


Downloading Dataset Version Zip in Basketball-Scorekeeper-2 to coco:: 100%|██████████| 119415/119415 [00:11<00:00, 10508.57it/s]





Extracting Dataset Version Zip to Basketball-Scorekeeper-2 in coco:: 100%|██████████| 681/681 [00:00<00:00, 1188.17it/s]


In [7]:
data_dir = dataset.location  # Path to the dataset directory
train_dir = os.path.join(data_dir, 'train')
val_dir = os.path.join(data_dir, 'valid')

train_annotations = os.path.join(train_dir, '_annotations.coco.json')
val_annotations = os.path.join(val_dir, '_annotations.coco.json')


In [4]:
class CocoDetectionDataset(Dataset):
    def __init__(self, root, annFile, transforms=None):
        self.root = root
        self.coco = COCO(annFile)
        self.ids = list(sorted(self.coco.imgs.keys()))
        self.transforms = transforms

        # Create a mapping from category IDs to labels starting from 1
        self.catid2label = {cat_id: idx + 1 for idx, cat_id in enumerate(sorted(self.coco.getCatIds()))}
        self.label2catid = {v: k for k, v in self.catid2label.items()}

    def __getitem__(self, index):
        # Load image and annotations
        coco = self.coco
        img_id = self.ids[index]
        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)

        # Load image
        img_info = coco.loadImgs(img_id)[0]
        path = img_info['file_name']
        img_path = os.path.join(self.root, path)
        img = Image.open(img_path).convert('RGB')

        # Prepare target
        num_objs = len(anns)
        boxes = []
        labels = []
        areas = []
        iscrowd = []

        for i in range(num_objs):
            ann = anns[i]
            bbox = ann['bbox']
            # COCO format bbox: [x_min, y_min, width, height]
            x_min = bbox[0]
            y_min = bbox[1]
            width = bbox[2]
            height = bbox[3]
            x_max = x_min + width
            y_max = y_min + height
            boxes.append([x_min, y_min, x_max, y_max])
            labels.append(self.catid2label[ann['category_id']])
            areas.append(width * height)
            iscrowd.append(ann.get('iscrowd', 0))

        # Convert everything into torch tensors
        if len(boxes) == 0:
            # Handle images without annotations
            boxes = torch.zeros((0, 4), dtype=torch.float32)
            labels = torch.zeros((0,), dtype=torch.int64)
            areas = torch.zeros((0,), dtype=torch.float32)
            iscrowd = torch.zeros((0,), dtype=torch.int64)
        else:
            boxes = torch.as_tensor(boxes, dtype=torch.float32)
            labels = torch.as_tensor(labels, dtype=torch.int64)
            areas = torch.as_tensor(areas, dtype=torch.float32)
            iscrowd = torch.as_tensor(iscrowd, dtype=torch.int64)

        image_id = torch.tensor([img_id])

        target = {}
        target['boxes'] = boxes
        target['labels'] = labels
        target['area'] = areas
        target['iscrowd'] = iscrowd
        target['image_id'] = image_id

        if self.transforms is not None:
            img = self.transforms(img)

        return img, target

    def __len__(self):
        return len(self.ids)


In [5]:
transform = T.Compose([
    T.ToTensor(),
    # Add other transforms like normalization, data augmentation, etc., if needed
])


In [8]:
# Create datasets
train_dataset = CocoDetectionDataset(root=train_dir, annFile=train_annotations, transforms=transform)
val_dataset = CocoDetectionDataset(root=val_dir, annFile=val_annotations, transforms=transform)

# Define the collate function
def collate_fn(batch):
    return tuple(zip(*batch))

# Create data loaders
train_loader = DataLoader(
    train_dataset,
    batch_size=4,  # Adjust batch size if necessary
    shuffle=True,
    num_workers=4,  # Adjust based on your environment
    collate_fn=collate_fn,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=4,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn,
    pin_memory=True
)


loading annotations into memory...
Done (t=0.01s)
creating index...
index created!
loading annotations into memory...
Done (t=0.00s)
creating index...
index created!


In [9]:
import torchvision
from torchvision.models.detection import FasterRCNN
from torchvision.models import mobilenet_v2
from torchvision.ops import MultiScaleRoIAlign

# Number of classes (background + your categories)
num_classes = len(train_dataset.catid2label) + 1  # +1 for background class

# Load a pre-trained MobileNetV2 model and extract the features
backbone = mobilenet_v2(weights='DEFAULT').features
backbone.out_channels = 1280  # MobileNetV2's last layer output channels

# Define the anchor generator with appropriate sizes and aspect ratios
anchor_generator = torchvision.models.detection.rpn.AnchorGenerator(
    sizes=((32, 64, 128, 256, 512),),
    aspect_ratios=((0.5, 1.0, 2.0),)
)

# Define the RoI pooler
roi_pooler = MultiScaleRoIAlign(
    featmap_names=['0'],  # The feature map key to use (since MobileNetV2 has a single feature map)
    output_size=7,        # Size of the pooled region
    sampling_ratio=2
)

# Create the Faster R-CNN model using the MobileNetV2 backbone
model = FasterRCNN(
    backbone=backbone,
    num_classes=num_classes,
    rpn_anchor_generator=anchor_generator,
    box_roi_pool=roi_pooler
)

# Move the model to the appropriate device
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
model.to(device)

# Optionally, freeze the backbone layers to reduce training time
for param in model.backbone.parameters():
    param.requires_grad = False


Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to C:\Users\vikto/.cache\torch\hub\checkpoints\mobilenet_v2-7ebf99e0.pth
100%|██████████| 13.6M/13.6M [00:01<00:00, 10.4MB/s]


In [10]:
# Only include parameters that require gradients
params = [p for p in model.parameters() if p.requires_grad]

# Define the optimizer
optimizer = torch.optim.SGD(params, lr=0.005, momentum=0.9, weight_decay=0.0005)

# Define the learning rate scheduler
lr_scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)


In [11]:
from torch.cuda.amp import autocast, GradScaler

# Initialize the GradScaler
scaler = GradScaler()


`torch.cuda.amp.GradScaler(args...)` is deprecated. Please use `torch.amp.GradScaler('cuda', args...)` instead.
torch.cuda.amp.GradScaler is enabled, but CUDA is not available.  Disabling.


In [None]:
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import json
from sklearn.model_selection import train_test_split
import shutil
import glob
import numpy as np
from PIL import Image

# Paths
dataset_path = "basketball-scorekeeper-uij6y"  # Path to the downloaded dataset
annotations_file = os.path.join(dataset_path, "annotations.json")
images_path = os.path.join(dataset_path, "images")

# Parameters
input_shape = (128, 128, 3)  # Resize images to 128x128 with 3 channels
batch_size = 32
epochs = 10
num_classes = 2  # Assuming two classes: 'score' and 'no-score'

# Step 1: Load COCO Annotations
def load_coco_annotations(annotations_file, images_path):
    with open(annotations_file, 'r') as f:
        annotations = json.load(f)
    
    images = {}
    for img in annotations['images']:
        images[img['id']] = os.path.join(images_path, img['file_name'])
    
    labels = {}
    for annotation in annotations['annotations']:
        image_id = annotation['image_id']
        category_id = annotation['category_id']
        if image_id not in labels:
            labels[image_id] = []
        labels[image_id].append(category_id)
    
    return images, labels

# Step 2: Prepare Data
def prepare_data(images, labels):
    X, y = [], []
    for image_id, image_path in images.items():
        if image_id in labels:
            img = Image.open(image_path).resize(input_shape[:2])
            X.append(np.array(img))
            y.append(labels[image_id][0])  # Assuming single label per image for simplicity
    
    X = np.array(X) / 255.0  # Normalize to [0, 1]
    y = tf.keras.utils.to_categorical(y, num_classes=num_classes)
    return X, y

images, labels = load_coco_annotations(annotations_file, images_path)
X, y = prepare_data(images, labels)

# Step 3: Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build the CNN Model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')
])

# Step 5: Compile the Model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Step 6: Train the Model
history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=epochs,
    batch_size=batch_size
)

# Step 7: Evaluate the Model
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Test Accuracy: {test_accuracy:.2f}")

# Step 8: Save the Model
model.save("basketball_scorekeeper_model.h5")
