In [62]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import os, glob
from datetime import datetime
import pandas as pd
from torch.utils.data import Dataset, DataLoader

### Question 2

CNN training: Train a Convolutional Neural Network on the simulated data for each of
the nine simulation settings. The goal is to use the CNN to predict the cancer status 𝑦𝑖
based on the simulated images 𝑋𝑖
. Additionally, generate a test set of 1000 subjects using
the same data generation process and evaluate the CNN's performance in terms of
classification accuracy. You are free to build a CNN with arbitrary hyperparameter
setting. Conduct at least 10 independent experiments for each setting by generating new
datasets each time, and report the hyperparameters for the CNN, the mean and standard
deviation of the classification accuracy achieved by your CNN model.


#### MODEL 2

In [46]:
model2 = torch.nn.Sequential()
model2.add_module('conv1', torch.nn.Conv2d(in_channels=1, out_channels=2, kernel_size = 3, padding = 1))
model2.add_module('relu1', torch.nn.ReLU())
model2.add_module('pool1', torch.nn.MaxPool2d(kernel_size = 2))


model2.add_module('conv2', torch.nn.Conv2d(in_channels=2, out_channels=4, kernel_size = 3, padding = 1))
model2.add_module('relu2', torch.nn.ReLU())
model2.add_module('pool2', torch.nn.MaxPool2d(kernel_size = 2))


model2.add_module('conv3', torch.nn.Conv2d(in_channels=4, out_channels=8, kernel_size = 3, padding = 1))
model2.add_module('relu3', torch.nn.ReLU())
model2.add_module('pool3', torch.nn.MaxPool2d(kernel_size = 2))


model2.add_module('Flatten', torch.nn.Flatten())

model2.add_module('fc1', torch.nn.Linear(128, 10))
model2.add_module('relu7', torch.nn.ReLU())
model2.add_module('fc2', torch.nn.Linear(10, 1))

model2.add_module('sigmoid', torch.nn.Sigmoid())


In [14]:
def simulateData(n, mu_c, mu_n):

    y = np.random.choice([0, 1], size = n, p = [0.5, 0.5])
    m_i = np.random.poisson(lam = mu_c, size = n) * y + np.random.poisson(lam = mu_n, size = n) * (1 - y)

    simulated_data = np.zeros([n, 32, 32])
    for i in range(n):
        random_indices = np.random.choice(32 * 32, m_i[i], replace = False)
        row_indices, col_indices = np.unravel_index(random_indices, (32, 32))
        Bi = np.zeros([32, 32])
        Bi[row_indices, col_indices] = 1
        epsilon_i = np.random.normal(loc = 0, scale = np.sqrt(0.04), size = (32, 32))
        simulated_data[i] = Bi + epsilon_i

    return y, simulated_data

class dataSetPytorch(Dataset):
    def __init__(self, x, y):
        self.x = torch.from_numpy(x.reshape([-1, 1, 32, 32])).float()
        self.y = torch.from_numpy(y)
    def __len__(self):
        return len(self.x)
    def __getitem__(self, idx):
        return self.x[idx], self.y[idx]

def makeTestLoader(numExperiments = 10):
    n_test = 1000
    mu_n = [5, 5, 5, 5, 5, 5, 5, 5, 5]
    mu_c = [10, 10, 10, 20, 20, 20, 30, 30, 30]
    
    dataLoader_experiment_data = []
    for experiment in range(numExperiments):
        dataLoader_settings = []
        for setting in range(9):

            y, simulated_data = simulateData(n = n_test,
                                             mu_c = mu_c[setting],
                                             mu_n = mu_n[setting])

            datasetSetting = dataSetPytorch(simulated_data, y)
            dataLoader = DataLoader(datasetSetting, batch_size=25, shuffle = True)
            dataLoader_settings.append(dataLoader)
            
        dataLoader_experiment_data.append(dataLoader_settings)
        
    return dataLoader_experiment_data
    

In [7]:
dataLoader_all_experiments_test = makeTestLoader(numExperiments = 10)

### Test the models

In [50]:
def modelTest(model, test_dataloader):
    accuracy_test = 0
    model.eval()
    with torch.no_grad():
        for x_batch, y_batch in test_dataloader:
            pred = model(x_batch)[:, 0]
            is_correct = ((pred >= 0.5).float() == y_batch).float()
            accuracy_test += is_correct.sum()   
    accuracy_test /= len(test_dataloader.dataset)
    print(f'Test Accuracy: {accuracy_test:4f}')
    return accuracy_test.numpy()

Note: In the print statment its supposed to be  print("Experiment:", experiment, "Setting:", setting + 1) but forgot to change it when i ran the long experiment ..

In [53]:
numExperiments = 10
numSettings = 9
accuracyMatrix = np.zeros([numSettings, numExperiments])
for experiment in range(numExperiments):
    dataLoader_individual_experiment_test = dataLoader_all_experiments_test[experiment]
    
    for setting in range(numSettings):
        print("Setting:", setting + 1, "Experiment:", experiment)
        folderPath = 'setting' + str(setting + 1)

        n = len(dataLoader_individual_experiment_test[setting].dataset)
        print("N:", n)
        
        targetModelName = "Setting" + str(setting + 1) + "_Experiment" + str(experiment)
        print(targetModelName)
        modelPath = os.path.join(".", folderPath)  # Constructing the path using os.path.join

        # List all files in the modelPath directory that match the targetModelName pattern
        modelString = [file for file in glob.glob(os.path.join(modelPath, f"*{targetModelName}*"))][0]
        print(matching_files)
        
        modelWeightParams = torch.load(modelString)
        model2.load_state_dict(modelWeightParams)
        
        accuracy_test = modelTest(model2, dataLoader_individual_experiment_test[setting])
        accuracyMatrix[setting, experiment] = accuracy_test
        



Setting: 1 Experiment: 0
N: 1000
Setting1_Experiment0
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.604000
Setting: 2 Experiment: 0
N: 1000
Setting2_Experiment0
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.729000
Setting: 3 Experiment: 0
N: 1000
Setting3_Experiment0
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.754000
Setting: 4 Experiment: 0
N: 1000
Setting4_Experiment0
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.968000
Setting: 5 Experiment: 0
N: 1000
Setting5_Experiment0
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.978000
Setting: 6 Experiment: 0
N: 1000
Setting6_Experiment0
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.957000
Setting: 7 Experiment: 0
N: 1000
Setting7_Experiment0
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.988000
Setting: 8 Experiment: 0
N:

Test Accuracy: 0.964000
Setting: 7 Experiment: 6
N: 1000
Setting7_Experiment6
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.471000
Setting: 8 Experiment: 6
N: 1000
Setting8_Experiment6
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.482000
Setting: 9 Experiment: 6
N: 1000
Setting9_Experiment6
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.517000
Setting: 1 Experiment: 7
N: 1000
Setting1_Experiment7
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.714000
Setting: 2 Experiment: 7
N: 1000
Setting2_Experiment7
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.518000
Setting: 3 Experiment: 7
N: 1000
Setting3_Experiment7
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.483000
Setting: 4 Experiment: 7
N: 1000
Setting4_Experiment7
.\setting9\modelSetting9_Experiment9_23_02_2024_epoch_81.pth
Test Accuracy: 0.951000
Set

In [54]:
accuracyMatrix

array([[0.60399997, 0.73199999, 0.68300003, 0.73199999, 0.72399998,
        0.71200001, 0.46700001, 0.71399999, 0.47      , 0.76599997],
       [0.72899997, 0.73100001, 0.76200002, 0.77100003, 0.75199997,
        0.77200001, 0.75599998, 0.51800001, 0.74400002, 0.76800001],
       [0.75400001, 0.79799998, 0.75199997, 0.76999998, 0.792     ,
        0.49900001, 0.51099998, 0.48300001, 0.75199997, 0.53799999],
       [0.96799999, 0.94599998, 0.95200002, 0.94199997, 0.94099998,
        0.954     , 0.94800001, 0.95099998, 0.94099998, 0.96200001],
       [0.97799999, 0.96700001, 0.96499997, 0.95700002, 0.96100003,
        0.49700001, 0.51499999, 0.96600002, 0.958     , 0.954     ],
       [0.95700002, 0.95700002, 0.96700001, 0.958     , 0.96899998,
        0.96600002, 0.96399999, 0.98299998, 0.96399999, 0.95999998],
       [0.98799998, 0.991     , 0.98900002, 0.995     , 0.98900002,
        0.98299998, 0.47099999, 0.99900001, 0.99000001, 0.50199997],
       [0.995     , 0.98799998, 0.9900000

In [60]:
n = [200, 500, 1000, 200, 500, 1000, 200, 500, 1000]
mu_n = [5, 5, 5, 5, 5, 5, 5, 5, 5]
mu_c = [10, 10, 10, 20, 20, 20, 30, 30, 30]
mean_accuracy = accuracyMatrix.mean(axis = 1)
std_accuracy = accuracyMatrix.std(axis = 1)

In [63]:
# Creating DataFrame
data = {
    'n': n,
    'mu_n': mu_n,
    'mu_c': mu_c,
    'mean_accuracy': mean_accuracy,
    'std_accuracy': std_accuracy
}

DataSettignsdf = pd.DataFrame(data)
display(DataSettignsdf)

Unnamed: 0,n,mu_n,mu_c,mean_accuracy,std_accuracy
0,200,5,10,0.6604,0.104083
1,500,5,10,0.7303,0.072259
2,1000,5,10,0.6649,0.129779
3,200,5,20,0.9505,0.008559
4,500,5,20,0.8718,0.183053
5,1000,5,20,0.9645,0.007393
6,200,5,30,0.8897,0.201759
7,500,5,30,0.8923,0.198259
8,1000,5,30,0.9457,0.142924
