In [1]:
import time
import data_prep
import os
import torch.optim as optim
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
from torchvision import datasets, transforms, models
from collections import OrderedDict

from torch.utils.data import DataLoader
from torch.utils.data import WeightedRandomSampler

from torchensemble import NeuralForestClassifier, VotingClassifier
from torchensemble.utils.logging import set_logger

In [2]:
print("Current Path : ", os.getcwd())
os.chdir('../..')

Current Path :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/src/04_models


In [3]:
ROOT_DIR = os.getcwd()
print("Root Project Path : ", ROOT_DIR)
ROOT_DATA_DIR = os.path.join(ROOT_DIR, 'data', 'xview_building_damage')
print("Root Data Path : ", ROOT_DATA_DIR)
TRAIN_DATA_DIR = os.path.join(ROOT_DATA_DIR, 'train')
print("Train Data Path : ", TRAIN_DATA_DIR)
CHALLENGE_DIR = os.path.join(ROOT_DATA_DIR, 'challenge')
TRAIN_DIR=os.path.join(CHALLENGE_DIR, 'train')
HOLD_DIR=os.path.join(CHALLENGE_DIR, 'hold')
TEST_DIR=os.path.join(CHALLENGE_DIR, 'test')

Root Project Path :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio
Root Data Path :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage
Train Data Path :  /Users/yaminigotimukul/DataScience/Berekley/Semesters/Spring_2024/repo/alivio/data/xview_building_damage/train


### Load Data

In [4]:
train_meta_df=pd.read_csv(os.path.join(ROOT_DATA_DIR, 'challenge', 'csv', 'hc_train_hurricane-michael.csv'))
valid_meta_df=pd.read_csv(os.path.join(ROOT_DATA_DIR, 'challenge', 'csv', 'hc_hold_hurricane-michael.csv'))
test_meta_df=pd.read_csv(os.path.join(ROOT_DATA_DIR, 'challenge', 'csv' , 'hc_test_hurricane-michael.csv'))

In [7]:
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

train_transform = transforms.Compose([
                                transforms.Resize(255),
                                transforms.RandomResizedCrop(224),
                                transforms.RandomHorizontalFlip(),
                                transforms.ColorJitter(),
                                transforms.ToTensor(),
                                transforms.Normalize(mean, std)])


test_transform = transforms.Compose([
                                transforms.Resize(255),
                                transforms.CenterCrop(224),
                                transforms.ToTensor(),
                                transforms.Normalize(mean, std)])

In [5]:
def get_img_class_dir(disaster_name, dataSplit, ROOT_DIR):   
 DIR=os.path.join(ROOT_DIR, dataSplit,'disaster', disaster_name , 'class', 'post')
 return DIR

In [12]:
train_loader, test_loader, classes, class_to_idx = data_prep.prepare_loader(
    get_img_class_dir('hurricane-michael', 'train', CHALLENGE_DIR), 
    get_img_class_dir('hurricane-michael', 'test', CHALLENGE_DIR), 
    train_transform, 
    test_transform)

print("Total Class: ", len(classes))

Train size:22686
Valid size:5657
Total Class:  5


In [18]:
device = 'cpu' 
if torch.cuda.is_available(): 
 device='cuda'
elif torch.backends.mps.is_available():
 device = 'mps' 

print("device = ", device)

device =  mps


In [14]:
densenet = models.densenet161(pretrained=True)
densenet.classifier



Linear(in_features=2208, out_features=1000, bias=True)

In [15]:
densenet = data_prep.freeze_parameters(densenet)
num_classes = 5

In [16]:
classifier = nn.Sequential(
  nn.Linear(in_features=2208, out_features=1024),
  nn.ReLU(),
  nn.Dropout(p=0.4),
  nn.Linear(in_features=1024, out_features=16),
  nn.LogSoftmax(dim=1)  
)
    
densenet.classifier = classifier
densenet.classifier

Sequential(
  (0): Linear(in_features=2208, out_features=1024, bias=True)
  (1): ReLU()
  (2): Dropout(p=0.4, inplace=False)
  (3): Linear(in_features=1024, out_features=16, bias=True)
  (4): LogSoftmax(dim=1)
)

In [22]:
densenet.to('cpu') ### Just for testing

criterion = nn.NLLLoss()
optimizer = optim.Adam(densenet.classifier.parameters(), lr=0.003)
# turn this off
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [20]:
epoch = 5

In [23]:
densenet, train_loss, test_loss = data_prep.train(densenet, train_loader, test_loader, epoch, optimizer, criterion)

Epoch: 1/5
		Going for validation
	Train loss:1.050469.. 	Valid Loss:0.989894.. 	Accuracy: 64.4688
	Validation loss decreased (inf --> 0.989894).  Saving model ...
	Epoch:1 completed in 148m 33s
Epoch: 2/5
		Going for validation
	Train loss:1.018960.. 	Valid Loss:0.982728.. 	Accuracy: 64.4688
	Validation loss decreased (0.989894 --> 0.982728).  Saving model ...
	Epoch:2 completed in 90m 3s
Epoch: 3/5
		Going for validation
	Train loss:1.011605.. 	Valid Loss:0.979851.. 	Accuracy: 64.4511
	Validation loss decreased (0.982728 --> 0.979851).  Saving model ...
	Epoch:3 completed in 233m 55s
Epoch: 4/5


KeyboardInterrupt: 

In [None]:
data_prep.check_overfitted(train_loss, test_loss)

### Resnet

In [None]:
resnet = models.resnet50(pretrained=True)
resnet.fc

In [None]:
resnet = data_prep.freeze_parameters(resnet)

In [None]:
classifier = nn.Sequential(
  nn.Linear(in_features=2048, out_features=1024),
  nn.ReLU(),
  nn.Dropout(p=0.4),
  nn.Linear(in_features=1024, out_features=16),
  nn.LogSoftmax(dim=1)  
)
    
resnet.fc = classifier
resnet.fc

In [None]:
device ='cpu'
resnet.to(device)
optimizer = optim.Adam(resnet.fc.parameters(), lr=0.003)
# turn this off
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
resnet, train_loss, test_loss = data_prep.train(resnet, train_loader, test_loader, epoch, optimizer, criterion)

In [None]:
data_prep.check_overfitted(train_loss, test_loss)

#### Inception Model v3

In [None]:
incept = models.inception_v3(pretrained=True)

print(incept.fc)
incept.aux_logits = False
print(incept.aux_logits)

In [None]:
incept = data_prep.freeze_parameters(incept)

In [None]:
classifier = nn.Sequential(
  nn.Linear(in_features=2048, out_features=1024),
  nn.ReLU(),
  nn.Dropout(p=0.4),
  nn.Linear(in_features=1024, out_features=16),
  nn.LogSoftmax(dim=1)  
)

classifier2 = nn.Sequential(
  nn.Linear(in_features=786, out_features=512),
  nn.ReLU(),
  nn.Dropout(p=0.4),
  nn.Linear(in_features=512, out_features=16),
  nn.LogSoftmax(dim=1)  
)
    
incept.fc = classifier
#incept.AuxLogits.fc = classifier2

print(incept.fc)

In [None]:
incept.to(device)
optimizer = optim.Adam(incept.fc.parameters(),lr=0.003)
# turn this off
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
incept, train_loss, test_loss = data_prep.train(incept, train_loader, test_loader, epoch, optimizer, criterion)

In [None]:
data_prep.check_overfitted(train_loss, test_loss)

#### Ensembling

In [None]:
class BuildingClassifierEnsemble(nn.Module):

    def __init__(self, modelA, modelB, modelC, input):
        super(BuildingClassifierEnsemble, self).__init__()
        self.modelA = modelA
        self.modelB = modelB
        self.modelC = modelC

        self.fc1 = nn.Linear(input, 16)

    def forward(self, x):
        out1 = self.modelA(x)
        out2 = self.modelB(x)
        out3 = self.modelC(x)

        out = out1 + out2 + out3

        x = self.fc1(out)
        return torch.softmax(x, dim=1)

In [None]:
model = BuildingClassifierEnsemble(densenet, resnet, incept, 16)

In [None]:
model.to(device)
optimizer = optim.Adam(model.parameters(),lr=0.003)
# turn this off
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=3, gamma=0.1)

In [None]:
model, train_loss, test_loss = data_prep.train(model, train_loader, test_loader, epoch, optimizer, criterion)