In [None]:
import os
import json
import numpy as np
from PIL import Image
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import torchvision.models as models
from torch.optim import Adam
from tqdm import tqdm

# data_path = "~/scratch/CS_6476_project_code/data/processed_data/"
data_path = "../data/processed_data/"

%load_ext autoreload
%autoreload 2

from utils import create_image_labels_list
from utils import split_data
from utils import create_data_loaders
from utils import compute_mean_and_std

In [None]:
seed = 42

# create image labels from annotations
image_labels, _ = create_image_labels_list(data_path)
print(image_labels[0:3])

In [None]:
print(len(image_labels))

num_1s = 0
for img in image_labels:
    if img[1] == 1:
        num_1s += 1

print(num_1s)
# 53k 1s out of 83k images

In [None]:
# split data
#############################################
# How much train data do we want to use?
train_size = 10000
#############################################
test_size, val_size = 2000, 2000
train_data, val_data, test_data = split_data(
    image_labels, train_size, val_size, test_size, seed
)

# num_categories = len(image_labels[0][1])
num_categories = 1
print(num_categories)

In [None]:
print(len(test_data))
print(test_data[0])

print(len(val_data))
print(val_data[0])

print(len(train_data))
print(train_data[0])

# train_data data points vary with train_size, test and val remain constant with everything, as intended

In [None]:
image_root_dir = data_path + "images/"

# calculate mean and std from train_data
mean, std = compute_mean_and_std(image_root_dir, train_data)
print(f"Trainset mean = {mean}, std = {std}")

# create Torch DataLoaders
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=mean, std=std),
])

batch_size=32
train_loader, val_loader, test_loader = create_data_loaders(
    train_data, val_data, test_data, image_root_dir, batch_size, transform
)

In [None]:
from models import get_model

In [None]:
model_vgg = get_model("vgg16", tune_conv=True, num_categories=num_categories, dropout_rate=0.5)
print(model_vgg)

In [None]:
model_resnet = get_model("resnet50", tune_conv=True, num_categories=num_categories, dropout_rate=0.5)
print(model_resnet)

In [None]:
for name, param in model_vgg.named_parameters():
    print(f"Layer: {name}, Requires Grad: {param.requires_grad}")

In [None]:
for name, param in model_resnet.named_parameters():
    print(f"Layer: {name}, Requires Grad: {param.requires_grad}")