In [None]:
# Import Neccessary Libraries:

import numpy as np
import pandas as pd
import torch
from torchvision import datasets, transforms, models
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, Dataset
from torch import nn
import torch.nn.functional as F
from torch import optim
from tqdm import tqdm
import pickle
from torch.optim.lr_scheduler import StepLR
import time
from PIL import Image

In [None]:
# To mount drive:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Checking GPU:
print(torch.cuda.get_device_name(0))

Tesla K80


In [None]:
# !cp "/content/drive/MyDrive/DS5500 Data/nasa_tropical_storm_competition_train_source.tar" -r "/content/nasa_tropical_storm_competition_train_source.tar"
# !cp "/content/drive/MyDrive/DS5500 Data/nasa_tropical_storm_competition_test_source.tar" -r "/content/nasa_tropical_storm_competition_test_source.tar"

In [None]:
# !tar -xvf "/content/nasa_tropical_storm_competition_train_source.tar" -C "/content/"
# !tar -xvf "/content/nasa_tropical_storm_competition_test_source.tar" -C "/content/"


In [None]:
def get_image_paths(data, image_dir, folder_name):
    data["image_path"] = image_dir + "/" + folder_name + "_" + data["image_id"] + "/" + "image.jpg"
    data = data[data.columns[[0, 2, 5, 1, 3, 4]]]
    return data

In [None]:
class HurricaneImageDataset(Dataset):

    def __init__(self, metadata, transforms):
        self.metadata = metadata
        self.transforms = transforms

    def __len__(self):
        return len(self.metadata)

    def __getitem__(self, index):
        if torch.is_tensor(index):
            index = index.tolist()

        image_path = self.metadata["image_path"][index]
        # hurricane_image = io.imread(image_path)
        hurricane_image = Image.open(image_path)
        label = self.metadata["wind_speed"][index]

        if self.transforms:
            hurricane_image = self.transforms(hurricane_image)

        return hurricane_image, label

In [None]:
data_dir = "/content/"
train_metadata = pd.read_csv("{}/train.csv".format(data_dir))
test_metadata = pd.read_csv("{}/test.csv".format(data_dir))
msk = np.random.rand(len(train_metadata)) < 0.8
train_metadata2 = train_metadata[msk].reset_index().drop("index", axis=1)
valid_metadata = train_metadata[~msk].reset_index().drop("index", axis=1)

train_folder_name = "nasa_tropical_storm_competition_train_source"
test_folder_name = "nasa_tropical_storm_competition_test_source"
train_image_dir = "{}/{}".format(data_dir, train_folder_name)
test_image_dir = "{}/{}".format(data_dir, test_folder_name)

train_metadata2 = get_image_paths(train_metadata2, train_image_dir, train_folder_name)
test_metadata = get_image_paths(test_metadata, test_image_dir, test_folder_name)
valid_metadata = get_image_paths(valid_metadata, train_image_dir, train_folder_name)

In [None]:
valid_metadata

Unnamed: 0,image_id,storm_id,image_path,wind_speed,relative_time,ocean
0,abs_001,abs,/content//nasa_tropical_storm_competition_trai...,44,1800,2
1,abs_002,abs,/content//nasa_tropical_storm_competition_trai...,45,5400,2
2,abs_012,abs,/content//nasa_tropical_storm_competition_trai...,65,48599,2
3,abs_020,abs,/content//nasa_tropical_storm_competition_trai...,64,64800,2
4,abs_024,abs,/content//nasa_tropical_storm_competition_trai...,61,73800,2
...,...,...,...,...,...,...
14181,zzp_190,zzp,/content//nasa_tropical_storm_competition_trai...,87,491399,1
14182,zzp_193,zzp,/content//nasa_tropical_storm_competition_trai...,85,498599,1
14183,zzp_199,zzp,/content//nasa_tropical_storm_competition_trai...,85,511200,1
14184,zzp_202,zzp,/content//nasa_tropical_storm_competition_trai...,85,518400,1


In [None]:
train_metadata2[["image_id", "image_path"]]

Unnamed: 0,image_id,image_path
0,abs_000,/content//nasa_tropical_storm_competition_trai...
1,abs_003,/content//nasa_tropical_storm_competition_trai...
2,abs_004,/content//nasa_tropical_storm_competition_trai...
3,abs_005,/content//nasa_tropical_storm_competition_trai...
4,abs_006,/content//nasa_tropical_storm_competition_trai...
...,...,...
56066,zzp_221,/content//nasa_tropical_storm_competition_trai...
56067,zzp_222,/content//nasa_tropical_storm_competition_trai...
56068,zzp_223,/content//nasa_tropical_storm_competition_trai...
56069,zzp_224,/content//nasa_tropical_storm_competition_trai...


In [None]:
transform = transforms.Compose([
        # transforms.RandomResizedCrop(224),
        transforms.Resize((224,224)),
        transforms.Grayscale(num_output_channels=1),
        # transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        # transforms.Normalize([0.5], [0.5])
])

In [None]:
trainset = HurricaneImageDataset(train_metadata2, transform)
testset = HurricaneImageDataset(test_metadata, transform)
validset = HurricaneImageDataset(valid_metadata, transform)

trainloader = DataLoader(trainset, batch_size=512, shuffle=True)
testloader = DataLoader(testset, batch_size=512, shuffle=True)
validloader = DataLoader(validset, batch_size=512, shuffle=True)

In [None]:
class BaselineCNN(nn.Module):
    def __init__(self):
        super(BaselineCNN, self).__init__()
        # convolutional layer
        self.conv1 = nn.Conv2d(1, 16, 5, stride=1, padding=2)
        self.conv2 = nn.Conv2d(16, 32, 5, stride=1, padding=2)
        self.conv3 = nn.Conv2d(32, 64, 5, stride=1, padding=2)
        self.conv4 = nn.Conv2d(64, 128, 5, stride=1, padding=2)
        # max pooling layer
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128*14*14, 32)
        self.output = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.25)

    def forward(self, x):
        # add sequence of convolutional and max pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        x = self.pool(F.relu(self.conv4(x)))
        x = x.view(x.size(0), -1)
        x = self.dropout(x)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.output(x)

        return x

In [None]:
model = BaselineCNN()
model.cuda()
print(model)

BaselineCNN(
  (conv1): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv4): Conv2d(64, 128, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=25088, out_features=32, bias=True)
  (output): Linear(in_features=32, out_features=1, bias=True)
  (dropout): Dropout(p=0.25, inplace=False)
)


In [None]:
from torchsummary import summary
summary(model.cuda(), (1, 224, 224))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 16, 224, 224]             416
         MaxPool2d-2         [-1, 16, 112, 112]               0
            Conv2d-3         [-1, 32, 112, 112]          12,832
         MaxPool2d-4           [-1, 32, 56, 56]               0
            Conv2d-5           [-1, 64, 56, 56]          51,264
         MaxPool2d-6           [-1, 64, 28, 28]               0
            Conv2d-7          [-1, 128, 28, 28]         204,928
         MaxPool2d-8          [-1, 128, 14, 14]               0
           Dropout-9                [-1, 25088]               0
           Linear-10                   [-1, 32]         802,848
          Dropout-11                   [-1, 32]               0
           Linear-12                    [-1, 1]              33
Total params: 1,072,321
Trainable params: 1,072,321
Non-trainable params: 0
---------------------------

In [None]:
# loss function
criterion = nn.MSELoss()

# optimizer
optimizer = optim.Adam(model.parameters(), lr = 0.001)

In [None]:
n_epochs = 50  
valid_loss_min = np.Inf
losses = {"train_loss": [], "test_loss": []}

for epoch in range(1, n_epochs + 1):
  # print("Epoch: {}".format(epoch))
  train_loss = 0.0
  valid_loss = 0.0
  model.train()
  for data, target in trainloader:
      data, target = data.cuda(), target.cuda()
      target = target.float().unsqueeze(1)
      optimizer.zero_grad()
      output = model(data)
      # print("Size: {}".format(output.size()))
      loss = criterion(output, target)
      loss.backward()
      optimizer.step()
      train_loss += loss.item() * data.size(0)

  model.eval()
  for data, target in validloader:
      data, target = data.cuda(), target.cuda()
      target = target.float().unsqueeze(1)
      output = model(data)
      loss = criterion(output, target)
      valid_loss += loss.item() * data.size(0)

  train_loss = train_loss / len(trainloader.dataset)
  losses["train_loss"].append(train_loss)
  valid_loss = valid_loss / len(validloader.dataset)
  losses["test_loss"].append(valid_loss)

  print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
      epoch, train_loss, valid_loss))

  if valid_loss <= valid_loss_min:
      print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
          valid_loss_min,
          valid_loss))
      torch.save(model.state_dict(), "/content/drive/MyDrive/DS5500 Data/baselineCNN_best_model.pt")
      valid_loss_min = valid_loss

RuntimeError: ignored

In [None]:
with open("/content/drive/MyDrive/DS5500 Data/baseline_losses.pkl", "wb") as f:
  pickle.dump(losses, f)

In [None]:
losses