In [54]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
from sklearn.model_selection import train_test_split
import shutil

In [81]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
from torch.optim import lr_scheduler

In [56]:
import flickrapi
import requests
import albumentations as A
from albumentations.pytorch import ToTensorV2

In [57]:
api_key = 'b0aa4ea7e6e6a87a1358a737c5e12973'
api_secret = '0c366913a2e6c98c'

In [58]:
flickr = flickrapi.FlickrAPI(api_key, api_secret, format='parsed-json')

In [59]:
base_directory = r'C:\Users\achin\SuspectTechnologies\Protests vs Parades'

In [60]:
def search_and_download_images(query, sub_directory):
    save_directory = os.path.join(base_directory, sub_directory)
    if not os.path.exists(save_directory):
        os.makedirs(save_directory)
    
    photos = flickr.photos.search(text=query, per_page=20, extras='url_o')
    for i, photo in enumerate(photos['photos']['photo']):
        url = photo.get('url_o')
        if url:
            response = requests.get(url)
            if response.status_code == 200:
                file_path = os.path.join(save_directory, f'{query}_{i}.jpg')
                with open(file_path, 'wb') as f:
                    f.write(response.content)
            else:
                print(f'Failed to download {url}')


In [61]:
search_and_download_images('public protests', 'protests')
search_and_download_images('parades', 'parades')

INFO:flickrapi.core:Calling {'method': 'flickr.photos.search', 'format': 'parsed-json', 'nojsoncallback': 1}
INFO:flickrapi.core:Calling {'method': 'flickr.photos.search', 'format': 'parsed-json', 'nojsoncallback': 1}


In [62]:
search_and_download_images('George Floyd protests', 'protests')
search_and_download_images('Championship parades', 'parades')

INFO:flickrapi.core:Calling {'method': 'flickr.photos.search', 'format': 'parsed-json', 'nojsoncallback': 1}
INFO:flickrapi.core:Calling {'method': 'flickr.photos.search', 'format': 'parsed-json', 'nojsoncallback': 1}


In [63]:
protests_dir = os.path.join(base_directory, 'protests')
parades_dir = os.path.join(base_directory, 'parades')
train_dir = os.path.join(base_directory, 'train')
val_dir = os.path.join(base_directory, 'val')
test_dir = os.path.join(base_directory, 'test') 


In [64]:
for directory in [train_dir, val_dir, test_dir]:
    os.makedirs(os.path.join(directory, 'protests'), exist_ok=True)
    os.makedirs(os.path.join(directory, 'parades'), exist_ok=True)

In [65]:
def split_and_copy_files(src_dir, train_dst, val_dst, test_dst, split_ratios = (0.7, 0.2, 0.1)):
    files = [f for f in os.listdir(src_dir) if os.path.isfile(os.path.join(src_dir, f))]
    train_files, temp_files = train_test_split(files, test_size=(1 - split_ratios[0]), random_state=42)
    val_files, test_files = train_test_split(temp_files, test_size=(split_ratios[2] / (split_ratios[1] + split_ratios[2])), random_state=42)

    for file in train_files:
        shutil.copy(os.path.join(src_dir, file), os.path.join(train_dst, file))
    for file in val_files:
        shutil.copy(os.path.join(src_dir, file), os.path.join(val_dst, file))
    for file in test_files:
        shutil.copy(os.path.join(src_dir, file), os.path.join(test_dst, file))
        

In [66]:
split_and_copy_files(protests_dir, os.path.join(train_dir, 'protests'), os.path.join(val_dir, 'protests'), os.path.join(test_dir, 'protests'))
split_and_copy_files(parades_dir, os.path.join(train_dir, 'parades'), os.path.join(val_dir, 'parades'), os.path.join(test_dir, 'parades'))

In [73]:
data_transforms = {
    'train': transforms.Compose([
        transforms.RandomResizedCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(30),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'val': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    
}

In [68]:
data_dir = r'C:/Users/achin/SuspectTechnologies/Protests vs Parades'
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val', 'test']}
dataloaders = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=32, shuffle=True, num_workers=4) for x in ['train', 'val', 'test']}
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val', 'test']}
class_names = image_datasets['train'].classes
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [82]:
base_model = models.resnet18(pretrained=True)
num_ftrs = base_model.fc.in_features
base_model.fc = nn.Sequential(nn.Linear(num_ftrs, 512), nn.ReLU(), nn.Dropout(0.5), nn.Linear(512, 2))



In [85]:
class CustomLoss(nn.Module):
    def __init__(self, weight_decay=1e-4):
        super(CustomLoss, self).__init__()
        self.cross_entropy = nn.CrossEntropyLoss()
        self.weight_decay = weight_decay
    def forward(self, outputs, targets, model):
        ce_loss = self.cross_entropy(outputs, targets)
        l2_reg = torch.tensor(0.).to(outputs.device)
        for param in model.parameters():
            if param.requires_grad:
                l2_reg += torch.norm(param)
        loss = ce_loss + self.weight_decay * l2_reg
        return loss

In [89]:
criterion2 = CustomLoss(weight_decay=1e-4)
optimizer2 = optim.SGD(base_model.parameters(), lr=0.001, momentum=0.9)
exp_lr_scheduler = lr_scheduler.StepLR(optimizer2, step_size=7, gamma=0.1)

In [92]:
def train_model2(model, criterion, optimizer, scheduler, dataloaders, dataset_sizes, num_epochs=25):
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()

            running_loss = 0.0
            running_corrects = 0

            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)

                optimizer.zero_grad()

                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels, model)

                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            if phase == 'train':
                scheduler.step()

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

        print()

    print(f'Best val Acc: {best_acc:.4f}')

    model.load_state_dict(best_model_wts)
    return model

In [95]:
custom_model = train_model2(base_model, criterion2, optimizer2, exp_lr_scheduler, dataloaders, dataset_sizes, num_epochs=10)
torch.save(custom_model.state_dict(), 'custom_model.pth')
custom_model.load_state_dict(torch.load('custom_model.pth'))
custom_model.eval()

Epoch 0/9
----------
train Loss: 0.7945 Acc: 0.4398
val Loss: 0.7375 Acc: 0.5397

Epoch 1/9
----------
train Loss: 0.7309 Acc: 0.5663
val Loss: 0.7122 Acc: 0.5714

Epoch 2/9
----------
train Loss: 0.7000 Acc: 0.6024
val Loss: 0.6689 Acc: 0.5873

Epoch 3/9
----------
train Loss: 0.6522 Acc: 0.7410
val Loss: 0.6401 Acc: 0.8571

Epoch 4/9
----------
train Loss: 0.6627 Acc: 0.7169
val Loss: 0.6173 Acc: 0.8413

Epoch 5/9
----------
train Loss: 0.6146 Acc: 0.7349
val Loss: 0.5673 Acc: 0.7778

Epoch 6/9
----------
train Loss: 0.5706 Acc: 0.7831
val Loss: 0.5297 Acc: 0.8413

Epoch 7/9
----------
train Loss: 0.5572 Acc: 0.7831
val Loss: 0.5300 Acc: 0.8413

Epoch 8/9
----------
train Loss: 0.5464 Acc: 0.7892
val Loss: 0.5279 Acc: 0.8413

Epoch 9/9
----------
train Loss: 0.5574 Acc: 0.7651
val Loss: 0.5231 Acc: 0.8571

Best val Acc: 0.8571


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [97]:
#Evaluate model on the test set
corrects = 0
total = 0
custom_model.eval()

with torch.no_grad():
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = custom_model(inputs)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        corrects += torch.sum(preds == labels.data)
accuracy = corrects.double() / total
print(f'Accuracy on the test set: {accuracy:.4f}')

Accuracy on the test set: 0.8108


In [76]:
#Model definition
model_ft = models.resnet18(pretrained=True)
num_ftrs = model_ft.fc.in_features
model_ft.fc = nn.Linear(num_ftrs, 2)
model_ft = model_ft.to(device)
criterion = nn.CrossEntropyLoss()
optimizer_ft = optim.SGD(model_ft.parameters(), lr=0.001, momentum=0.9)

model_2 = models.resnet50(pretrained=True)
num_ftrs = model_2.fc.in_features
model_2.fc = nn.Linear(num_ftrs, 2)
model_2 = model_2.to(device)
for param in model_2.parameters():
    param.requires_grad = False
for param in model_ft.layer4.parameters():
    param.requires_grad = True
model_2.fc = nn.Sequential(nn.Dropout(0.5), nn.Linear(num_ftrs, 2))    
optimizer = optim.SGD(model_2.layer4.parameters(), lr = 0.001, momentum=0.9)



In [77]:
#training loop
def train_model(model, criterion, optimizer, num_epochs = 10):
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch}/{num_epochs - 1}')
        print('-' * 10)

        for phase in ['train', 'val']:
            if phase == 'train':
                model.train()
            else:
                model.eval()
            running_loss = 0.0
            running_corrects = 0
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                optimizer.zero_grad()
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)
            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]
            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            if phase == 'val' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()
        print()
    print(f'Best val Acc: {best_acc:.4f}')
    model.load_state_dict(best_model_wts)
    return model


In [79]:
#training the model
model_ft= train_model(model_ft, criterion, optimizer_ft, num_epochs=10)
torch.save(model_ft.state_dict(), 'model.pth')

Epoch 0/9
----------
train Loss: 0.7916 Acc: 0.5181
val Loss: 0.6488 Acc: 0.5714

Epoch 1/9
----------
train Loss: 0.7146 Acc: 0.5904
val Loss: 0.4863 Acc: 0.8571

Epoch 2/9
----------
train Loss: 0.6362 Acc: 0.5843
val Loss: 0.3859 Acc: 0.9206

Epoch 3/9
----------
train Loss: 0.4615 Acc: 0.7651
val Loss: 0.3473 Acc: 0.8730

Epoch 4/9
----------
train Loss: 0.3847 Acc: 0.8253
val Loss: 0.3043 Acc: 0.9048

Epoch 5/9
----------
train Loss: 0.3461 Acc: 0.8916
val Loss: 0.2369 Acc: 0.9524

Epoch 6/9
----------
train Loss: 0.2603 Acc: 0.9096
val Loss: 0.2171 Acc: 0.9524

Epoch 7/9
----------
train Loss: 0.2573 Acc: 0.8976
val Loss: 0.1817 Acc: 0.9524

Epoch 8/9
----------
train Loss: 0.2203 Acc: 0.9277
val Loss: 0.1648 Acc: 0.9683

Epoch 9/9
----------
train Loss: 0.1821 Acc: 0.9337
val Loss: 0.1739 Acc: 0.9683

Best val Acc: 0.9683


In [80]:
#Evaluate model on the test set
corrects = 0
total = 0
model_ft.eval()

with torch.no_grad():
    for inputs, labels in dataloaders['test']:
        inputs = inputs.to(device)
        labels = labels.to(device)
        outputs = model_ft(inputs)
        _, preds = torch.max(outputs, 1)
        total += labels.size(0)
        corrects += torch.sum(preds == labels.data)
accuracy = corrects.double() / total
print(f'Accuracy on the test set: {accuracy:.4f}')

Accuracy on the test set: 0.9189
