In [1]:
import zipfile
import pandas as pd
import os
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split

In [2]:
# Unzipping the dataset
with zipfile.ZipFile('/content/train_SOaYf6m.zip', 'r') as zip_ref:
    zip_ref.extractall('data')

In [3]:
# Loading the training data and testing data
train_df = pd.read_csv('/content/data/train.csv')
test_df = pd.read_csv('/content/data/test.csv')

In [4]:
# Splitting the training data into training and validation sets
train_df, val_df = train_test_split(train_df, test_size=0.2, random_state=42)

In [5]:
# Defining image path
image_path = '/content/data/images'

In [6]:
# Custom Dataset class for loading images
class VehicleDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_dir, self.dataframe.iloc[idx, 0])
        image = Image.open(img_name).convert('RGB')
        label = self.dataframe.iloc[idx, 1] if 'emergency_or_not' in self.dataframe.columns else -1

        if self.transform:
            image = self.transform(image)

        return image, label

In [7]:
# Defining transformations for the training data with data augmentation
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ToTensor(),
])

# Defining transformations for the testing data
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

In [None]:
# Define transformations for the images
#transform = transforms.Compose([
#    transforms.Resize((128, 128)),
#    transforms.ToTensor(),
#])

In [8]:
# Creating datasets and dataloaders
train_dataset = VehicleDataset(train_df, image_path, transform=train_transform)
val_dataset = VehicleDataset(val_df, image_path, transform=train_transform)
test_dataset = VehicleDataset(test_df, image_path, transform=test_transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [9]:
# Loading pre-trained ResNet18 model and modify the final layer
model = models.resnet18(pretrained=True)
num_ftrs = model.fc.in_features
# 2 classes: emergency and non-emergency
model.fc = nn.Linear(num_ftrs, 2)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 172MB/s]


In [10]:
# Defining loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)


In [11]:
# Training the model with validation
num_epochs = 10
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}")

    # Validation step
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)

    val_accuracy = correct / total
    print(f"Validation Loss: {val_loss/len(val_loader)}, Validation Accuracy: {val_accuracy}")

Epoch 1/10, Loss: 0.3668429055472925
Validation Loss: 0.31993619013916363, Validation Accuracy: 0.8727272727272727
Epoch 2/10, Loss: 0.2304074876010418
Validation Loss: 1.061462489041415, Validation Accuracy: 0.806060606060606
Epoch 3/10, Loss: 0.2788249823663916
Validation Loss: 0.41898030042648315, Validation Accuracy: 0.8242424242424242
Epoch 4/10, Loss: 0.1471666841812077
Validation Loss: 0.29302086477929895, Validation Accuracy: 0.8727272727272727
Epoch 5/10, Loss: 0.1050182053198417
Validation Loss: 0.19386424191973425, Validation Accuracy: 0.9181818181818182
Epoch 6/10, Loss: 0.09631109955011025
Validation Loss: 0.33945635841651395, Validation Accuracy: 0.8848484848484849
Epoch 7/10, Loss: 0.094530002435758
Validation Loss: 0.33549097044901416, Validation Accuracy: 0.9303030303030303
Epoch 8/10, Loss: 0.12634280723120486
Validation Loss: 0.7297993898391724, Validation Accuracy: 0.8090909090909091
Epoch 9/10, Loss: 0.26001822318704354
Validation Loss: 0.5610571584918282, Validati

In [12]:
# Predicting on the test data
model.eval()
predictions = []
with torch.no_grad():
    for images, _ in test_loader:
        images = images.to(device)
        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy())

In [13]:
# Creating the output predicted DataFrame
submission_df = pd.DataFrame({'image_names': test_df['image_names'], 'emergency_or_not': predictions})

# Saving the output to a CSV file
submission_df.to_csv('sample_submission.csv', index=False)

print("Classification completed and results are saved to sample_submission.csv")

Classification completed and results are saved to sample_submission.csv
