In [None]:
# Imports
import os, zipfile, shutil
from pathlib import Path
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models, transforms
from PIL import Image
print('Imports loaded')

In [None]:
# Reproducibility + device
SEED = int(os.environ.get('PROJECT_SEED', 42))
np.random.seed(SEED)
torch.manual_seed(SEED)
if torch.cuda.is_available():
    torch.cuda.manual_seed(SEED)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

In [None]:
# Configuration (portable): env vars -> ./data -> auto-detect
DEMO_MODE = os.environ.get('DEMO_MODE', '1') == '1'  # set to '0' for full runs
DATA_DIR = os.environ.get('PROJECT_DATA_DIR', './data')
# Allow explicit CSV/ZIP via env vars (overrides auto-detect)
TRAIN_CSV_PATH = os.environ.get('TRAIN_CSV_PATH')
TEST_CSV_PATH = os.environ.get('TEST_CSV_PATH')
ZIP_FILE_PATH = os.environ.get('ZIP_FILE_PATH')
EXTRACT_DIR = os.environ.get('EXTRACT_DIR', os.path.join(DATA_DIR, 'images'))
CONFIG = {
    'batch_size': 8 if not DEMO_MODE else 4,
    'num_workers': 0,
    'epochs': 3 if DEMO_MODE else 50,
    'learning_rate': 1e-4,
    'weight_decay': 1e-4,
    'num_classes': 61,
    'image_size': 224,
    'bbox_loss_weight': 0.01
}
print('CONFIG loaded, DEMO_MODE=', DEMO_MODE)

In [None]:
# Helper: extract zip and find csvs/images
def extract_zip_dataset(zip_path, extract_to='./data'):
    extract_path = Path(extract_to)
    extract_path.mkdir(parents=True, exist_ok=True)
    with zipfile.ZipFile(zip_path, 'r') as z:
        z.extractall(extract_path)
    return extract_path

def find_csv_and_images(root_dir):
    root = Path(root_dir)
    csvs = list(root.rglob('*.csv'))
    train_csv = None
    test_csv = None
    for c in csvs:
        name = c.name.lower()
        if 'train' in name and train_csv is None:
            train_csv = str(c)
        if 'test' in name and test_csv is None:
            test_csv = str(c)
    # images dir: choose a folder containing many jpg/png files
    image_dir = None
    for d in root.rglob('*'):
        try:
            if d.is_dir() and any(f.suffix.lower() in ['.jpg', '.jpeg', '.png'] for f in d.iterdir()):
                image_dir = str(d)
                break
        except Exception:
            pass
    if image_dir is None:
        image_dir = str(root)
    return dict(train_csv=train_csv, test_csv=test_csv, image_dir=image_dir)

print('Helper functions defined')

In [None]:
# Dataset class (same API as original but portable)
class CowStallDataset(Dataset):
    def __init__(self, df, image_dir, transform=None):
        self.df = df.reset_index(drop=True)
        self.image_dir = image_dir
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        fname = str(row[0])
        img_path = os.path.join(self.image_dir, fname)
        img = cv2.imread(img_path)
        if img is None:
            raise FileNotFoundError(f'Image not found: {img_path}')
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = cv2.resize(img, (CONFIG['image_size'], CONFIG['image_size']))
        img = img.astype('float32') / 255.0
        if self.transform is not None:
            img = self.transform(img)
        label = torch.tensor(int(row[5]), dtype=torch.long)
        bbox = row[['box_position_1','box_position_2','box_position_3','box_position_4']].values.astype('float32')
        bbox[2] = bbox[0] + bbox[2]
        bbox[3] = bbox[1] + bbox[3]
        bbox = torch.as_tensor(bbox, dtype=torch.float32)
        return img, label, bbox

print('Dataset class defined')

In [None]:
# Transforms (accept numpy arrays as input)
IMAGE_SIZE = CONFIG['image_size']
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Resize((IMAGE_SIZE, IMAGE_SIZE)),
    transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225])
])
print('Transforms prepared')

In [None]:
# Load data: prefer env paths, then ./data, then auto-detect inside zip if provided
print('
--- Loading dataset (portable) ---')
# Ensure data dir exists
Path(DATA_DIR).mkdir(parents=True, exist_ok=True)
# If explicit CSV env vars were not set, look inside ./data or inside ZIP
if not TRAIN_CSV_PATH or not TEST_CSV_PATH:
    # try common filenames in DATA_DIR
    for name in ['train.csv','Train.csv']:
        candidate = os.path.join(DATA_DIR, name)
        if os.path.exists(candidate):
            TRAIN_CSV_PATH = TRAIN_CSV_PATH or candidate
            break
    for name in ['test.csv','Test.csv']:
        candidate = os.path.join(DATA_DIR, name)
        if os.path.exists(candidate):
            TEST_CSV_PATH = TEST_CSV_PATH or candidate
            break
# If ZIP provided and CSVs still not found, extract and search
if (not TRAIN_CSV_PATH or not TEST_CSV_PATH) and ZIP_FILE_PATH and os.path.exists(ZIP_FILE_PATH):
    print('Extracting ZIP to', EXTRACT_DIR)
    extracted = extract_zip_dataset(ZIP_FILE_PATH, EXTRACT_DIR)
    info = find_csv_and_images(extracted)
    TRAIN_CSV_PATH = TRAIN_CSV_PATH or info.get('train_csv')
    TEST_CSV_PATH = TEST_CSV_PATH or info.get('test_csv')
    IMAGE_DIR = IMAGE_DIR or info.get('image_dir')

# If still missing, try searching workspace root as last resort
if (not TRAIN_CSV_PATH or not TEST_CSV_PATH):
    root_info = find_csv_and_images('.')
    TRAIN_CSV_PATH = TRAIN_CSV_PATH or root_info.get('train_csv')
    TEST_CSV_PATH = TEST_CSV_PATH or root_info.get('test_csv')
    IMAGE_DIR = IMAGE_DIR or root_info.get('image_dir')

# If IMAGE_DIR still None, fall back to EXTRACT_DIR or DATA_DIR
if IMAGE_DIR is None:
    if os.path.exists(EXTRACT_DIR):
        IMAGE_DIR = EXTRACT_DIR
    elif os.path.exists(DATA_DIR):
        IMAGE_DIR = DATA_DIR

print('Resolved paths:')
print('  TRAIN_CSV_PATH =', TRAIN_CSV_PATH)
print('  TEST_CSV_PATH  =', TEST_CSV_PATH)
print('  IMAGE_DIR      =', IMAGE_DIR)

# Final existence checks and load
if not TRAIN_CSV_PATH or not TEST_CSV_PATH or not IMAGE_DIR:
    raise FileNotFoundError('Train/Test CSV or image dir not found. Place files into ./data or set environment variables.')

df_train = pd.read_csv(TRAIN_CSV_PATH).fillna(0)
df_test = pd.read_csv(TEST_CSV_PATH).fillna(0)
print('Loaded dataframes: train=', len(df_train), ' test=', len(df_test))
# DEMO: if DEMO_MODE, sample small subset to speed up runs
if DEMO_MODE:
    df_train = df_train.sample(n=min(32, len(df_train)), random_state=SEED).reset_index(drop=True)
    df_test = df_test.sample(n=min(16, len(df_test)), random_state=SEED).reset_index(drop=True)
    print('DEMO_MODE: sampled train=', len(df_train), ' test=', len(df_test))

In [None]:
# Create datasets and dataloaders
train_dataset = CowStallDataset(df_train, IMAGE_DIR, transform=train_transform)
test_dataset = CowStallDataset(df_test, IMAGE_DIR, transform=test_transform)
train_loader = DataLoader(train_dataset, batch_size=CONFIG['batch_size'], shuffle=True, num_workers=CONFIG['num_workers'])
test_loader = DataLoader(test_dataset, batch_size=CONFIG['batch_size'], shuffle=False, num_workers=CONFIG['num_workers'])
print('DataLoaders:', len(train_loader), 'train batches,', len(test_loader), 'test batches')

In [None]:
# Simple model (ResNet50 backbone with two heads)
class ObjectDetectionModel(nn.Module):
    def __init__(self, num_classes=61, num_bbox=4):
        super().__init__()
        self.backbone = models.resnet50(pretrained=True)
        nf = self.backbone.fc.in_features
        self.backbone.fc = nn.Identity()
        self.classifier = nn.Linear(nf, num_classes)
        self.bbox_regressor = nn.Linear(nf, num_bbox)
    def forward(self, x):
        feats = self.backbone(x)
        logits = self.classifier(feats)
        bbox = self.bbox_regressor(feats)
        return logits, bbox

model = ObjectDetectionModel(num_classes=CONFIG['num_classes']).to(device)
print('Model ready, params:', sum(p.numel() for p in model.parameters()))

In [None]:
# Training setup
criterion_cls = nn.CrossEntropyLoss()
criterion_bbox = nn.MSELoss()
optimizer = optim.Adam([p for p in model.parameters() if p.requires_grad], lr=CONFIG['learning_rate'], weight_decay=CONFIG['weight_decay'])
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)

EPOCHS = CONFIG['epochs']
for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for images, labels, bboxes in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        bboxes = bboxes.to(device)
        optimizer.zero_grad()
        logits, bbox_preds = model(images)
        loss1 = criterion_cls(logits, labels)
        loss2 = torch.sqrt(criterion_bbox(bbox_preds, bboxes)) * CONFIG['bbox_loss_weight']
        loss = loss1 + loss2
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    scheduler.step()
    print(f'Epoch {epoch+1}/{EPOCHS} - loss: {running_loss/len(train_loader):.4f}')
    if DEMO_MODE and epoch==0:
        break

In [None]:
# Save a small checkpoint
os.makedirs('./models', exist_ok=True)
torch.save({'model_state_dict': model.state_dict(), 'config': CONFIG}, './models/portable_checkpoint.pt')
print('Saved checkpoint to ./models/portable_checkpoint.pt')

---
Notes:
- For full training, set `DEMO_MODE=0` and adjust `CONFIG['epochs']`.
- To run on another machine, set `PROJECT_DATA_DIR` or `TRAIN_CSV_PATH`/`TEST_CSV_PATH` env vars.
- See `README.md` in the project root for instructions on dataset placement and recommended Git LFS usage.