In [1]:
import h5py
import torch
import torchvision
from torchvision import transforms
import torchvision.models as models
from torch.utils.data import Dataset, DataLoader, random_split
import torch.nn as nn
import numpy as np
from PIL import Image
import math

In [2]:
torch.cuda.empty_cache()

In [3]:
# Dataset Creation

In [4]:
transform = transforms.Compose([transforms.ToTensor(), transforms.CenterCrop((267,267))])
transform2 = transforms.ToTensor()
file_path = '/home/jovyan/PlanetaryComputerExamples/MLWare Dataset/train.h5'

In [5]:
class dataset(Dataset):
    
    def __init__(self, file_path, transform=None):
        ds = h5py.File(file_path, "r")
        self.transform = transform
        self.x_data = ds['x']
        self.y_data = ds['y']
        self.y_data = np.reshape(self.y_data, (-1, 1))
        self.n_samples = self.y_data.shape[0]

    def __getitem__(self, index):
        return self.transform(self.x_data[index].astype('float32')), torch.from_numpy(self.y_data[index].astype('float32'))

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples

In [6]:
cell_dataset = dataset(file_path, transform2)
train_dataset, test_dataset = random_split(cell_dataset, (10000, 3400))

In [7]:
# model

In [8]:
net = models.resnet34(pretrained=False)
net.fc = nn.Linear(512, 1)

In [9]:
# prep data for training

In [18]:
train_batch_size = 32
test_batch_size = 1
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=test_batch_size, shuffle=True)

In [11]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda', index=0)

In [12]:
# net = xresnet101(n_out=1, pretrained=False)
net = net.cuda() if device else net

In [50]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(net.parameters()),lr=0.00005,betas=(0.9,0.999),eps=1e-08,weight_decay=0,amsgrad=False)

In [48]:
dataiter = iter(train_dataloader)
data = dataiter.next()
features, labels = data
features, labels = features.to(device), labels.to(device)
print(features.shape, labels.shape)
out = net(features)
print(criterion(out, labels))
loss = torch.sqrt(criterion(out, labels))
loss

torch.Size([32, 3, 299, 299]) torch.Size([32, 1])
tensor(0.1437, device='cuda:0', grad_fn=<MseLossBackward0>)


tensor(0.3791, device='cuda:0', grad_fn=<SqrtBackward0>)

In [None]:
epochs = 100
train_loss = []
val_loss_history = []
max_iters = len(train_dataloader)
for epoch in range(epochs):
    running_loss = 0
    val_loss = 0
    for batch_idx, (x, y) in enumerate(train_dataloader):
        x, y = x.to(device), y.to(device)
        optimizer.zero_grad()
        output = net(x)
        loss = torch.sqrt(criterion(output, y))
        running_loss += loss.item() 
        loss.backward()
        optimizer.step()
        if (batch_idx) % 50 == 0:
            print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}' 
                   .format(epoch, epochs, batch_idx, max_iters, loss.item()))
    print(f'\ntrain-loss: {running_loss/max_iters:.4f}')
    train_loss.append(running_loss/max_iters)
    if epoch % 5 == 0:
        with torch.no_grad():
            net.eval()
            for data, target in (test_dataloader):
                data, target = data.to(device), target.to(device)
                out = net(data)
                loss_ = criterion(out, target)
                val_loss += loss_.item()
            val_loss = math.sqrt(val_loss/len(test_dataloader))
            val_loss_history.append(val_loss)
            print(f'validation loss: {loss_.item()/len(test_dataloader):.4f}\n')
    net.train()
    # torch.cuda.empty_cache()

In [52]:
torch.save(net.state_dict(), '/home/jovyan/PlanetaryComputerExamples/MLWare_After Competition/resnet34_weights.pt')

In [None]:
torch.save(net, '/home/jovyan/PlanetaryComputerExamples/MLWare_After Competition/resnet34_model.pt')

In [17]:
np.save('/home/jovyan/PlanetaryComputerExamples/MLWare_After Competition/train_loss.npy', train_loss)

In [18]:
np.save('/home/jovyan/PlanetaryComputerExamples/MLWare_After Competition/val_loss.npy', val_loss_history)

In [24]:
# making predictions on the test dataset

In [53]:
pred_file_path = '/home/jovyan/PlanetaryComputerExamples/MLWare Dataset/test.h5'
pred_transform = transforms.ToTensor()

In [54]:
class Pred_Dataset(Dataset):
    
    def __init__(self, file_path, transform=None):
        ds = h5py.File(file_path, "r")
        self.transform = transform
        self.x_data = ds['x']
        self.n_samples = self.x_data.shape[0]

    def __getitem__(self, index):
        return self.transform(self.x_data[index]).float()

    # we can call len(dataset) to return the size
    def __len__(self):
        return self.n_samples

In [56]:
pred_dataset = Pred_Dataset(pred_file_path, pred_transform)

In [57]:
from tqdm import tqdm
pred_dataloader = DataLoader(pred_dataset, batch_size=1, shuffle=False)
preds = []
print(len(pred_dataloader))

6600


In [58]:
for idx, pred_data in tqdm(enumerate(pred_dataloader)):
    pred_data = pred_data.to(device)
    prediction = net(pred_data).item()
    preds.append(prediction)

6600it [00:44, 147.50it/s]


In [39]:
round_preds = [round(num) for num in preds]
round_preds

[2,
 3,
 1,
 1,
 3,
 6,
 3,
 3,
 2,
 2,
 1,
 1,
 6,
 5,
 4,
 5,
 4,
 1,
 1,
 3,
 3,
 3,
 3,
 6,
 4,
 4,
 6,
 1,
 5,
 1,
 1,
 1,
 3,
 4,
 5,
 4,
 5,
 6,
 1,
 4,
 4,
 5,
 2,
 1,
 2,
 3,
 4,
 4,
 3,
 1,
 7,
 2,
 1,
 5,
 2,
 3,
 2,
 1,
 4,
 1,
 3,
 3,
 2,
 2,
 1,
 6,
 2,
 3,
 1,
 5,
 2,
 2,
 5,
 2,
 3,
 5,
 6,
 3,
 1,
 4,
 3,
 3,
 6,
 4,
 1,
 2,
 2,
 3,
 4,
 5,
 1,
 6,
 1,
 4,
 5,
 3,
 4,
 3,
 4,
 1,
 5,
 3,
 1,
 3,
 5,
 1,
 1,
 4,
 2,
 1,
 2,
 1,
 2,
 6,
 1,
 1,
 1,
 5,
 6,
 1,
 2,
 5,
 1,
 3,
 4,
 1,
 1,
 1,
 2,
 4,
 5,
 6,
 1,
 1,
 2,
 2,
 4,
 1,
 2,
 2,
 1,
 1,
 2,
 4,
 1,
 2,
 4,
 3,
 1,
 4,
 1,
 1,
 1,
 1,
 1,
 5,
 1,
 1,
 4,
 2,
 4,
 6,
 5,
 4,
 1,
 2,
 2,
 1,
 1,
 3,
 1,
 6,
 3,
 6,
 2,
 4,
 1,
 5,
 3,
 2,
 4,
 2,
 6,
 1,
 1,
 2,
 2,
 3,
 2,
 4,
 4,
 1,
 4,
 4,
 4,
 3,
 6,
 1,
 1,
 6,
 1,
 1,
 1,
 1,
 3,
 2,
 4,
 4,
 1,
 2,
 1,
 7,
 4,
 1,
 3,
 5,
 1,
 5,
 4,
 4,
 6,
 2,
 6,
 4,
 3,
 1,
 6,
 5,
 4,
 2,
 1,
 6,
 2,
 1,
 1,
 3,
 1,
 2,
 2,
 2,
 6,
 2,
 3,
 4,
 2,
 1,
 5,
 3,
 3,
 3,


In [42]:
id = []
for i in range(6600):
  id.append(i)

In [43]:
import pandas as pd
df = pd.DataFrame()
df['id'] = id
df['label'] = round_preds
df

Unnamed: 0,id,label
0,0,2
1,1,3
2,2,1
3,3,1
4,4,3
...,...,...
6595,6595,4
6596,6596,2
6597,6597,1
6598,6598,6


In [45]:
df.to_csv('/home/jovyan/PlanetaryComputerExamples/MLWare_After Competition/_prediction3.csv', index=False)