In [12]:
import pandas as pd
import os
import pyarrow.parquet as pq
from tqdm import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
from torchvision.models import resnet50, resnet18

In [13]:
train_meta = pq.ParquetFile('/home/washindeiru/primaryDrive/iceCube/icecube-neutrinos-in-deep-ice/train_meta.parquet')
it = train_meta.iter_batches()
train_meta = next(it).to_pandas()

In [14]:
train_meta.head(20)

Unnamed: 0,batch_id,event_id,first_pulse_index,last_pulse_index,azimuth,zenith
0,1,24,0,60,5.029555,2.087498
1,1,41,61,111,0.417742,1.549686
2,1,59,112,147,1.160466,2.401942
3,1,67,148,289,5.845952,0.759054
4,1,72,290,351,0.653719,0.939117
5,1,77,352,401,0.011372,1.295371
6,1,79,402,717,3.533397,2.479947
7,1,82,718,762,5.252108,0.672366
8,1,121,763,803,3.084929,2.06588
9,1,127,804,846,6.154335,1.371721


In [15]:
from Resnet import resnet101
import torch.nn as nn
model = resnet101(pretrained = False)
model.fc = nn.Sequential(nn.ReLU(),nn.Linear(in_features=2048, out_features=2)) # Changed FC layer for our task

In [16]:
model.load_state_dict(torch.load("./model_101.pth", weights_only=True))

<All keys matched successfully>

In [17]:
from sklearn.metrics import mean_squared_error
import numpy as np

def evaluation(dataloader):
    predictions = torch.tensor([], dtype=torch.float).to(device) # Tensor for prediction value appending
    actual = torch.tensor([], dtype=torch.float).to(device) # Tensor for answer value appending
    with torch.no_grad():
        model.eval()
        for data in dataloader:
            inputs, values = data['input_tensor'].float().to(device),data['label'].to(device)
            outputs = model(inputs).to(device)
            predictions = torch.cat((predictions, torch.stack([torch.argmax(o) for o in outputs])),0)
            actual = torch.cat((actual, values), 0)
    predictions = predictions.cpu().numpy()
    actual = actual.cpu().numpy()
    rmse = np.sqrt(mean_squared_error(predictions, actual))
    return rmse

In [18]:
from torch import optim
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# device = 'cpu'

pqfile = '/home/washindeiru/primaryDrive/iceCube/icecube-neutrinos-in-deep-ice/train_meta.parquet'
path_batch = '/home/washindeiru/primaryDrive/iceCube/icecube-neutrinos-in-deep-ice/train/'

batch_num=8
lr = 1e-06
num_epochs = 1
batch_size = 6

model = model.to(device)
optimizer = optim.Adam(model.parameters(), lr=lr)
loss_function = nn.MSELoss().to(device)

In [19]:
from torch.utils.data import DataLoader
from Icecube_Dataloader import IceCube_Dataloader, collate_fn

ice_dataset = IceCube_Dataloader(pqfile, path_batch, batch_num)
train_dataset = ice_dataset

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, collate_fn=collate_fn, num_workers=11)

In [20]:
print(device)

cuda


In [21]:
params = {
    'num_epochs':num_epochs,
    'optimizer':optimizer,
    'loss_function':loss_function,
    'train_dataloader':train_dataloader,
    'device':device,
    'num_epoch' : num_epochs
}


In [22]:
import gc
train_losses = []
outputs_ = []

def train(model, params):
    model.train()
    loss_function=params["loss_function"]
    train_dataloader=params["train_dataloader"]

    aa = 0

    device=params["device"]
    for epoch in range(0, num_epochs):
        # for batch_idx, dat in tqdm(enumerate(train_dataloader)):
        for dat in tqdm(train_dataloader):
            inputs, labels = dat['input_tensor'].to(device),dat['label'].to(device)

            assert not torch.isnan(inputs).any(), "Inputs contain NaNs"
            assert not torch.isnan(labels).any(), "Labels contain NaNs"

            outputs = model(inputs).to(device)
            outputs_.append(outputs.cpu().detach().numpy())

            train_loss = loss_function(outputs.float(),labels.float())
            train_losses.append(train_loss.item())

            if aa%100==0:
                print(f"Train loss: {train_loss.item()}")

            train_loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
            optimizer.step()
            optimizer.zero_grad()
            torch.cuda.empty_cache()

            aa = aa+1

    model.eval()
    gc.collect()

    torch.cuda.empty_cache()
    return 0


train(model, params)

  0%|          | 1/33334 [00:01<18:24:48,  1.99s/it]

Train loss: 1.8851845264434814


  0%|          | 102/33334 [00:10<45:46, 12.10it/s] 

Train loss: 2.735726833343506


  0%|          | 104/33334 [00:11<1:02:33,  8.85it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 82.00 MiB. GPU 0 has a total capacity of 7.77 GiB of which 55.56 MiB is free. Process 205185 has 958.00 MiB memory in use. Including non-PyTorch memory, this process has 6.27 GiB memory in use. Of the allocated memory 5.84 GiB is allocated by PyTorch, and 266.68 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
import matplotlib.pyplot as plt

plt.figure()
plt.plot(train_losses)

In [None]:
from datetime import datetime

now = datetime.now()

timestamp = now.strftime("%Y-%m-%d_%H:%M:%S")

In [None]:
name = "./weights/model_101_"+timestamp+".pth"
torch.save(model.state_dict(), name)

In [None]:
import pickle

name = "./results/losses_"+timestamp+".pickle"
with open(name, 'wb') as f:
    pickle.dump(train_losses, f)