In [1]:
from torchvision.models import resnet18
from torch import nn, save, max, no_grad, randperm
import torch.optim as optim
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader, SubsetRandomSampler
import torchvision.transforms as transforms
from google.colab import drive
from numpy import mean
import pandas as pd
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [None]:
#!unzip /content/drive/MyDrive/model_comparisons_for_baboon/IMAGES_BABOON_ARI.zip -d /content/drive/MyDrive/model_comparisons_for_baboon/IMAGES_BABOON_ARI


unzip:  cannot find or open /content/drive/MyDrive/model_comparisons_for_baboon/IMAGES_BABOON_ARI.zip, /content/drive/MyDrive/model_comparisons_for_baboon/IMAGES_BABOON_ARI.zip.zip or /content/drive/MyDrive/model_comparisons_for_baboon/IMAGES_BABOON_ARI.zip.ZIP.


In [2]:
def load_train_data(folder = "100", N_train_samples = 100):
  transform = transforms.Compose([
      transforms.Grayscale(),
      transforms.ToTensor(),
      transforms.Normalize((0.5,), (0.5,))
    ])
  ################# change folder here when changing the task ######################
  train_ds = ImageFolder(root = '/content/drive/MyDrive/model_comparisons_for_baboon/IMAGES_FOR_BABOON/Dataset_ari_steps1k/task_w_nw/'+folder+'/train', transform=transform)
  subsample_train_indices = randperm(len(train_ds))[:N_train_samples]
  train_dl = DataLoader(train_ds, batch_size=64, sampler=SubsetRandomSampler(subsample_train_indices))
  return train_dl

def load_test_data(folder = "100"):
  transform = transforms.Compose([
      transforms.Grayscale(),
      transforms.ToTensor(),
      transforms.Normalize((0.5,), (0.5,))
    ])
  ################# change folder here when changing the task ######################
  test_ds = ImageFolder(root='/content/drive/MyDrive/model_comparisons_for_baboon/IMAGES_FOR_BABOON/Dataset_ari_steps1k/task_w_nw/'+folder+'/train', transform=transform)
  test_dl = DataLoader(test_ds, batch_size=64)
  return test_ds

def fit_test_model(folder = "100", N_train_samples = 100):

  train_dl = load_train_data(folder = folder, N_train_samples = N_train_samples)
  test_dl = load_train_data(folder = folder)

  model = resnet18(num_classes=2)
  model.conv1 = nn.Conv2d(1, 64, kernel_size=(63, 63), stride=(2, 2), padding=1, bias=False)

  # Define the optimizer and loss function
  optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9)
  criterion = nn.CrossEntropyLoss()
  #print(train_dl)
  for _ in range(15):
      for inputs, labels in train_dl:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

  total = 0
  correct = 0
  model.eval()
  with no_grad(): # This is optional but saves memory
    for inputs, labels in test_dl:
        # Forward pass
        outputs = model(inputs)
        # Calculate predictions
        _, predicted = max(outputs, 1)
        # Calculate accuracy (optional)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

  test_acc = correct / total

  total = 0
  correct = 0
  model.eval()
  with no_grad(): # This is optional but saves memory
    for inputs, labels in train_dl:
        # Forward pass
        outputs = model(inputs)
        # Calculate predictions
        _, predicted = max(outputs, 1)
        # Calculate accuracy (optional)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

  train_acc = correct / total

  return ({'folder': folder, 'samples': N_train_samples, 'train': train_acc, 'test': test_acc})

In [3]:
def wrapper_model_fit(folder_list = ["100","200","300"], N_train_samples_list = [1240, 2479, 3719, 4958], iterations = 10):
  data = []

  for folder in folder_list:
    for N_train_samples in N_train_samples_list:
      print(str(N_train_samples)+" "+folder)
      acc_list = [fit_test_model(folder = folder, N_train_samples = N_train_samples) for i in range(iterations)]
      print(mean(acc_list))
      [data.append({'folder': folder, 'train_samples': N_train_samples, 'acc': acc}) for acc in acc_list]

  return data

In [4]:
load_test_data(folder = "100")

Dataset ImageFolder
    Number of datapoints: 99
    Root location: /content/drive/MyDrive/model_comparisons_for_baboon/IMAGES_FOR_BABOON/Dataset_ari_steps1k/task_w_nw/100/train
    StandardTransform
Transform: Compose(
               Grayscale(num_output_channels=1)
               ToTensor()
               Normalize(mean=(0.5,), std=(0.5,))
           )

In [8]:
N = 100
#[round(N*.1), round(N*.25), round(N*.5), round(N*.75), round(N*1)]
#data = wrapper_model_fit(folder_list = ["normal","original_oPE","50p_oPE"], N_train_samples_list = [round(N*0.5)], iterations = 10)
data = wrapper_model_fit(folder_list = ["100"], N_train_samples_list = [round(N)], iterations = 10)
df = pd.DataFrame.from_records(data)
df


100 100


TypeError: ignored