# ResNet18 and ResNet50

In [1]:
from utils import *
import torch
import torch.nn as nn
from tqdm import tqdm
from torchvision import models
from sklearn.metrics import f1_score
import wandb

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
epochs = 150
lr = 1e-3
batch_size = 16
model_size = 50
model_name = f'resnet{model_size}'
# model_name = 'densenet169'

wandb.init(project="Digital Medicine Case 2", entity="tommytyc")
wandb.config = {
    "learning_rate": lr,
    "epochs": epochs,
    "batch_size": batch_size,
    "model_name": model_name,
}

train_df = pd.read_csv("./Data/train_label.csv")
# valid_df = pd.read_csv("./Data/valid_label.csv")
test_df = pd.read_csv("./Data/test_label.csv")
train_set = Covid_Dataset(train_df, 'train')
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
# valid_set = Covid_Dataset(valid_df, 'valid')
# valid_loader = DataLoader(valid_set, batch_size=1, shuffle=True)
test_set = Covid_Dataset(test_df, 'test')
test_loader = DataLoader(test_set, batch_size=1)

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mtommytyc[0m (use `wandb login --relogin` to force relogin)


## ResNet

In [2]:
class ResNet(nn.Module):
    def __init__(self, model_size=18, pretrained=True):
        super(ResNet, self).__init__()
        if model_size == 18:
            last_dim = 512
        elif model_size == 50 or model_size == 101 or model_size == 152:
            last_dim = 2048
        
        self.model_size = model_size
        pretrained_model = models.__dict__[f"resnet{model_size}"](pretrained=pretrained)
        
        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        self.bn1 = pretrained_model._modules["bn1"]
        self.relu = pretrained_model._modules["relu"]
        self.maxpool = pretrained_model._modules["maxpool"]
        
        self.layer1 = pretrained_model._modules["layer1"]
        self.layer2 = pretrained_model._modules["layer2"]
        self.layer3 = pretrained_model._modules["layer3"]
        self.layer4 = pretrained_model._modules["layer4"]
        
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(last_dim, 3)
        
        del pretrained_model

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

## DenseNet

In [3]:
# from collections import OrderedDict
# model = models.densenet169(pretrained=True)
# model.features[0] = nn.Sequential(OrderedDict([
#             ('conv0', nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)),
#             ('norm0', nn.BatchNorm2d(64)),
#             ('relu0', nn.ReLU(inplace=True)),
#             ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
#         ]))
# model.classifier = nn.Linear(1664, 3)

## Training

In [4]:
model = ResNet(model_size=model_size, pretrained=True)
ce_loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=lr, momentum=0.9, weight_decay=1e-4)
model.to(device)
ce_loss.to(device)
best_acc = 0
for epoch in tqdm(range(epochs)):
    model.train()
    for images, labels in train_loader:
        images = images.to(device)
        labels = labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = ce_loss(outputs, labels)
        loss.backward()
        optimizer.step()
    # model.eval()
    # with torch.no_grad():
    #     valid_preds, valid_label = [], []
    #     for images, labels in valid_loader:
    #         images = images.to(device)
    #         labels = labels.to(device)
    #         outputs = model(images)
    #         valid_preds.append(outputs.argmax(dim=1).cpu().numpy())
    #         valid_label.append(labels.detach().cpu().numpy())
    #     acc = f1_score(valid_preds, valid_label, average='macro')
    #     wandb.log({"loss": loss, 'valid_acc': acc}, step=epoch)
    #     if acc > best_acc:
    #         best_acc = acc
    #         torch.save(model.state_dict(), f"./Models/{model_name}_covid_model.pt")
    if epoch % 5 == 0:
        # print(f"Epoch: {epoch}, loss: {loss}, acc: {best_acc}")
        print(f"Epoch: {epoch}, loss: {loss}")

  1%|          | 1/150 [01:18<3:16:06, 78.97s/it]

Epoch: 0, loss: 1.012873888015747


  4%|▍         | 6/150 [07:55<3:10:26, 79.35s/it]

Epoch: 5, loss: 0.8146169185638428


  7%|▋         | 11/150 [14:32<3:04:03, 79.45s/it]

Epoch: 10, loss: 1.2352805137634277


 11%|█         | 16/150 [21:09<2:57:23, 79.43s/it]

Epoch: 15, loss: 0.7852510809898376


 14%|█▍        | 21/150 [27:46<2:50:38, 79.37s/it]

Epoch: 20, loss: 0.2811877131462097


 17%|█▋        | 26/150 [34:23<2:44:07, 79.42s/it]

Epoch: 25, loss: 0.47667866945266724


 21%|██        | 31/150 [41:00<2:37:32, 79.43s/it]

Epoch: 30, loss: 0.051106590777635574


 24%|██▍       | 36/150 [47:37<2:30:44, 79.33s/it]

Epoch: 35, loss: 0.22887012362480164


 27%|██▋       | 41/150 [54:14<2:24:06, 79.32s/it]

Epoch: 40, loss: 0.049387432634830475


 31%|███       | 46/150 [1:00:51<2:17:37, 79.40s/it]

Epoch: 45, loss: 0.02291695401072502


 34%|███▍      | 51/150 [1:07:29<2:11:18, 79.58s/it]

Epoch: 50, loss: 0.010366552509367466


 37%|███▋      | 56/150 [1:14:06<2:04:36, 79.54s/it]

Epoch: 55, loss: 0.09647729992866516


 41%|████      | 61/150 [1:20:42<1:57:38, 79.31s/it]

Epoch: 60, loss: 0.18295983970165253


 44%|████▍     | 66/150 [1:27:19<1:51:06, 79.37s/it]

Epoch: 65, loss: 0.04920356348156929


 47%|████▋     | 71/150 [1:33:57<1:44:36, 79.45s/it]

Epoch: 70, loss: 0.005516589619219303


 51%|█████     | 76/150 [1:40:34<1:37:57, 79.43s/it]

Epoch: 75, loss: 0.01956999860703945


 54%|█████▍    | 81/150 [1:47:11<1:31:21, 79.44s/it]

Epoch: 80, loss: 0.017079487442970276


 57%|█████▋    | 86/150 [1:53:49<1:24:48, 79.51s/it]

Epoch: 85, loss: 0.04956820234656334


 61%|██████    | 91/150 [2:00:26<1:18:03, 79.37s/it]

Epoch: 90, loss: 0.010915916413068771


 64%|██████▍   | 96/150 [2:07:03<1:11:32, 79.50s/it]

Epoch: 95, loss: 0.0004917970509268343


 67%|██████▋   | 101/150 [2:13:41<1:04:54, 79.48s/it]

Epoch: 100, loss: 0.008476155810058117


 71%|███████   | 106/150 [2:20:18<58:14, 79.42s/it]

Epoch: 105, loss: 0.03160415217280388


 74%|███████▍  | 111/150 [2:26:55<51:35, 79.37s/it]

Epoch: 110, loss: 0.017468256875872612


 77%|███████▋  | 116/150 [2:33:32<45:00, 79.42s/it]

Epoch: 115, loss: 0.0002989819913636893


 81%|████████  | 121/150 [2:40:09<38:24, 79.46s/it]

Epoch: 120, loss: 0.0007909522391855717


 84%|████████▍ | 126/150 [2:46:46<31:47, 79.47s/it]

Epoch: 125, loss: 0.009264652617275715


 87%|████████▋ | 131/150 [2:53:24<25:10, 79.50s/it]

Epoch: 130, loss: 0.0017399350181221962


 91%|█████████ | 136/150 [3:00:02<18:33, 79.53s/it]

Epoch: 135, loss: 0.07705352455377579


 94%|█████████▍| 141/150 [3:06:39<11:55, 79.54s/it]

Epoch: 140, loss: 0.002609513932839036


 97%|█████████▋| 146/150 [3:13:17<05:18, 79.55s/it]

Epoch: 145, loss: 0.008381438441574574


100%|██████████| 150/150 [3:18:35<00:00, 79.44s/it]


## Predicting

In [5]:
torch.save(model.state_dict(), f"./Models/{model_name}_last_covid_model.pt")

In [15]:
cat_transform = {
    0: "Atypical",
    1: "Negative", 
    2: "Typical"
}

model = ResNet(model_size=model_size, pretrained=True)
# model = models.densenet169(pretrained=True)
# model.features[0] = nn.Sequential(OrderedDict([
#             ('conv0', nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)),
#             ('norm0', nn.BatchNorm2d(64)),
#             ('relu0', nn.ReLU(inplace=True)),
#             ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
#         ]))
# model.classifier = nn.Linear(1664, 3)
model.load_state_dict(torch.load(f"./Models/{model_name}_last_covid_model.pt"))

model.to(device)
model.eval()
out_df = pd.DataFrame(columns=["FileID", "Type"])
cnt = 0
with torch.no_grad():
    for images, filename in test_loader:
        images = images.float().to(device)
        outputs = model(images)
        # _, preds = torch.max(outputs, 1)
        # out_df.loc[cnt] = [filename[0], cat_transform[preds.item()]]
        out_df.loc[cnt] = [filename[0], torch.sigmoid(outputs.cpu()).numpy()]
        cnt += 1

In [46]:
out_df.sort_values(by="FileID", inplace=True)
out_df = out_df.reset_index(drop=True)
out_df
# out_df.to_csv(f"./Results/{model_name}_covid_model.csv", index=False)

Unnamed: 0,FileID,Type
0,014cc6362544,"[[0.6239632, 0.9429339, 0.04088789]]"
1,014f6b975233,"[[0.9950551, 0.007974871, 0.5676814]]"
2,04d7e099eafa,"[[0.65850955, 0.99963915, 0.00023509748]]"
3,05a7f46635c7,"[[0.19512738, 0.9978173, 0.011924788]]"
4,05e002867b16,"[[0.16254319, 0.47659382, 0.8787334]]"
...,...,...
145,f6a9cee1149d,"[[0.98779535, 5.2951422e-05, 0.99647397]]"
146,f75463651b85,"[[0.16292557, 0.986323, 0.07755546]]"
147,f7f391fe9a3c,"[[0.9459657, 0.80476886, 0.01229056]]"
148,fb31eeb4f056,"[[4.1511106e-05, 0.99862826, 0.9743319]]"


In [41]:
# relu_prob = pd.read_csv("Results/relu_prob.csv")
# relu_prob

Unnamed: 0,ID,Atypical,Negative,Typical
0,014cc6362544,0.493702,0.500094,0.534156
1,014f6b975233,0.514952,0.604787,0.354928
2,04d7e099eafa,0.469840,0.677867,0.339246
3,05a7f46635c7,0.455261,0.592920,0.434772
4,05e002867b16,0.483056,0.511940,0.510766
...,...,...,...,...
145,f6a9cee1149d,0.522470,0.375124,0.638731
146,f75463651b85,0.572599,0.675057,0.276502
147,f7f391fe9a3c,0.552043,0.524315,0.484677
148,fb31eeb4f056,0.337982,0.822020,0.245641


In [38]:
from collections import OrderedDict
model = models.densenet169(pretrained=True)
model.features[0] = nn.Sequential(OrderedDict([
            ('conv0', nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)),
            ('norm0', nn.BatchNorm2d(64)),
            ('relu0', nn.ReLU(inplace=True)),
            ('pool0', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
        ]))
model.classifier = nn.Linear(1664, 3)
model.load_state_dict(torch.load(f"./Models/densenet169_last_covid_model.pt"))

model.to(device)
model.eval()
dense_out_df = pd.DataFrame(columns=["FileID", "Type"])
cnt = 0
with torch.no_grad():
    for images, filename in test_loader:
        images = images.float().to(device)
        outputs = model(images)
        # _, preds = torch.max(outputs, 1)
        # out_df.loc[cnt] = [filename[0], cat_transform[preds.item()]]
        dense_out_df.loc[cnt] = [filename[0], torch.sigmoid(outputs.cpu()).numpy()]
        cnt += 1

In [47]:
dense_out_df.sort_values(by="FileID", inplace=True)
dense_out_df = out_df.reset_index(drop=True)
dense_out_df

Unnamed: 0,FileID,Type
0,014cc6362544,"[[0.6239632, 0.9429339, 0.04088789]]"
1,014f6b975233,"[[0.9950551, 0.007974871, 0.5676814]]"
2,04d7e099eafa,"[[0.65850955, 0.99963915, 0.00023509748]]"
3,05a7f46635c7,"[[0.19512738, 0.9978173, 0.011924788]]"
4,05e002867b16,"[[0.16254319, 0.47659382, 0.8787334]]"
...,...,...
145,f6a9cee1149d,"[[0.98779535, 5.2951422e-05, 0.99647397]]"
146,f75463651b85,"[[0.16292557, 0.986323, 0.07755546]]"
147,f7f391fe9a3c,"[[0.9459657, 0.80476886, 0.01229056]]"
148,fb31eeb4f056,"[[4.1511106e-05, 0.99862826, 0.9743319]]"


In [48]:
res_dense = pd.DataFrame(columns=["FileID", "Atypical", "Negative", "Typical"])
for i in range(len(out_df)):
    res_dense.loc[i] = [out_df.loc[i, "FileID"], out_df.loc[i]["Type"][0][0] + dense_out_df.loc[i]["Type"][0][0], out_df.loc[i]["Type"][0][1] + dense_out_df.loc[i]["Type"][0][1], out_df.loc[i]["Type"][0][2] + dense_out_df.loc[i]["Type"][0][2]]
res_dense["Type"] = res_dense[["Atypical", "Negative", "Typical"]].idxmax(axis=1)
res_dense = res_dense[["FileID", "Type"]]
res_dense.to_csv(f"./Results/res_dense_voting.csv", index=False)
res_dense

Unnamed: 0,FileID,Type
0,014cc6362544,Negative
1,014f6b975233,Atypical
2,04d7e099eafa,Negative
3,05a7f46635c7,Negative
4,05e002867b16,Typical
...,...,...
145,f6a9cee1149d,Typical
146,f75463651b85,Negative
147,f7f391fe9a3c,Atypical
148,fb31eeb4f056,Negative


In [43]:
# for idx, row in relu_prob.iterrows():
#     relu_prob.at[idx, "Atypical"] = row["Atypical"] + out_df.loc[idx]["Type"][0][0] + dense_out_df.loc[idx]["Type"][0][0]
#     relu_prob.at[idx, "Negative"] = row["Negative"] + out_df.loc[idx]["Type"][0][1] + dense_out_df.loc[idx]["Type"][0][1]
#     relu_prob.at[idx, "Typical"] = row["Typical"] + out_df.loc[idx]["Type"][0][2] + dense_out_df.loc[idx]["Type"][0][2]
# relu_prob["Type"] = relu_prob[["Atypical", "Negative", "Typical"]].idxmax(axis=1)
# relu_prob.rename(columns={"ID": "FileID"}, inplace=True)
# relu_prob = relu_prob[["FileID", "Type"]]
# relu_prob.to_csv(f"./Results/soft_voting.csv", index=False)
# relu_prob

Unnamed: 0,FileID,Type
0,014cc6362544,Negative
1,014f6b975233,Atypical
2,04d7e099eafa,Negative
3,05a7f46635c7,Negative
4,05e002867b16,Typical
...,...,...
145,f6a9cee1149d,Typical
146,f75463651b85,Negative
147,f7f391fe9a3c,Atypical
148,fb31eeb4f056,Negative
