In [109]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import tqdm
from tqdm.notebook import tqdm as notebooktqdm
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from PIL import Image
import timm
import os
# from google.colab import files

In [110]:
# work place
work_dir = './'
os.chdir(work_dir)

In [111]:
class YoutubeDataset(Dataset):
    def __init__(self, data, doc2vec):
        self.ids = list(data['video_id'])
        self.titles = doc2vec
        self.data = data
        self.image_encoder = timm.create_model('efficientnet_b1_pruned', features_only =True, pretrained=True)

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        video_id = self.ids[idx]
        
        image = Image.open( work_dir+'medium_15287/{}.jpg'.format(video_id))
        image = torch.FloatTensor(np.array(image)).permute(2, 0, 1).unsqueeze(dim=0)
        self.image_encoder.eval()
        feature_map = self.image_encoder(image)[-1].squeeze() # (320,6,10)
        
        title = self.titles[video_id]
        title = torch.FloatTensor(np.array(title, dtype=np.float16))
        
        meta = torch.FloatTensor(self.data[['period_day', 'subscriber_count']].to_numpy()[idx])
        
        y = np.log10(self.data['views'].to_numpy() + 1)
        y = np.expand_dims(y, axis=1)
        y = torch.FloatTensor(y[idx])
        
        return video_id, feature_map, title, meta, y

In [112]:
# from google.colab import drive
# drive.mount('/content/drive')

In [113]:
train_data, valid_data = train_test_split(pd.read_csv('./train.csv'), test_size = 0.1, random_state = 55)
test_data = pd.read_csv('./test.csv')
train_data = train_data[:1000]
valid_data = valid_data[:100]
test_data = test_data[:100]
print('Train Dataset Size : ',len(train_data))
print('Validation Dataset Size : ',len(valid_data))
print('Test Dataset Size : ',len(test_data))

Train Dataset Size :  1000
Validation Dataset Size :  100
Test Dataset Size :  100


In [114]:
# open doc2vec data and conver to dict
with open('./title_doc2vec_10', 'rb') as f:
    doc2vec = pickle.load(f)

data_dict=dict()
for row in doc2vec:
    vid=row[0]
    vec=row[1:]
    data_dict[vid]=vec

doc2vec = data_dict
print(len(doc2vec))

15287


In [115]:
#setting hyper parameters
batch_size = 64
epochs = 100
lr = 1e-4
save_every = 5
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [116]:
train_dataset = YoutubeDataset(train_data, doc2vec)
valid_dataset = YoutubeDataset(valid_data, doc2vec)
test_dataset = YoutubeDataset(test_data, doc2vec)

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size = 1)
test_loader = DataLoader(test_dataset, batch_size = 1)

In [117]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.feature_map_channel = 320
        self.feature_map_h = 6
        self.feature_map_w = 10
        self.bottle_nect_out_channel = 10

        self.title_channel = 10

        self.img_title_concat_hidden = 50
        self.img_title_concat_out = 10

        self.final_concat_hidden = 20
        self.final_concat_out = 1
        
        
        self.squeeze_conv = nn.Conv2d(in_channels=self.feature_map_channel, out_channels=self.bottle_nect_out_channel, kernel_size=3, padding=1) 
        self.max_pool = nn.MaxPool2d(kernel_size=(self.feature_map_h, self.feature_map_w), stride=1)
        self.img_title_concat_fc1 = nn.Linear(self.bottle_nect_out_channel+self.title_channel, self.img_title_concat_hidden)
        self.img_title_concat_fc2 = nn.Linear(self.img_title_concat_hidden, self.img_title_concat_out)

        self.final_concat_fc1 = nn.Linear(self.img_title_concat_out+2, self.final_concat_hidden)
        self.final_concat_fc2 = nn.Linear(self.final_concat_hidden, self.final_concat_out)
 
        self.fc1 = nn.Linear(19212, 500)
        self.fc2 = nn.Linear(500, 1)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
    
    def forward(self, feature_map, title, meta):
        feature_map = feature_map.to(self.device)
        title = title.to(self.device)
        meta = meta.to(self.device)

        squeezed_feature_map = self.squeeze_conv(feature_map)
        squeezed_feature_map = self.relu(squeezed_feature_map)
        dense_feature_map = torch.squeeze(self.max_pool(squeezed_feature_map), dim = (2,3))

        img_title_feature = torch.cat([dense_feature_map, title], dim=1)
        img_title_feature = self.img_title_concat_fc1(img_title_feature)
        img_title_feature = self.relu(img_title_feature)
        img_title_feature = self.img_title_concat_fc2(img_title_feature)
        img_title_feature = self.relu(img_title_feature)

        whole_feature = torch.cat([img_title_feature, meta], dim=1)
        whole_feature = self.final_concat_fc1(whole_feature)
        whole_feature = self.relu(whole_feature)
        x = self.final_concat_fc2(whole_feature)

        # flatten_feature_map = torch.flatten(feature_map, start_dim=1)
        # x = torch.cat([flatten_feature_map, title, meta], dim=1)
        # x = self.sigmoid(self.fc1(x))
        # x = self.fc2(x)
        return x

    def train_(self, epochs, lr, train_loader, valid_loader, save_every):
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.parameters(), lr=lr)

        self.train_loss = []
        self.valid_loss = []

        best_mse = 1e100
        best_epoch = 1

        train_start = time.time()

        print("Model will be trained on {}\n".format(self.device))

        for epoch in range(1, epochs + 1):
            self.train()
            print("[Epoch {:3d} / {}]".format(epoch, epochs))

            epoch_start = time.time()
            epoch_loss = 0.0
            
            #training
            for batch_idx, batch_data in enumerate(tqdm.tqdm(train_loader, desc="Training")):
                batch_video_id, batch_image, batch_title, batch_meta, batch_target = batch_data
                batch_target = batch_target.to(self.device)
                
                self.optimizer.zero_grad()
                output = self.forward(batch_image, batch_title, batch_meta)
                loss = self.criterion(output, batch_target)
                loss.backward()
                self.optimizer.step()

                epoch_loss += loss.item()
                print('Epoch {}, mini-batch {}, loss {}'.format(epoch, batch_idx, loss.item()))

                # just for debuging
                break

            epoch_end = time.time()
            m, s = divmod(epoch_end - epoch_start, 60)

            epoch_loss /= len(train_loader)
            self.train_loss.append(epoch_loss)
            
            #validation
            with torch.no_grad():
                self.eval()
                true_y, pred_y = self.predict(valid_loader)                
                true_y = torch.FloatTensor(true_y).unsqueeze(dim=1)
                pred_y = torch.FloatTensor(pred_y)
                valid_loss = self.criterion(pred_y, true_y)
                self.valid_loss.append(valid_loss.item())

            print("Train MSE = {:.4f} | Valid MSE = {:.4f}".format(epoch_loss, valid_loss))
            print(f"Train Time: {m:.0f}m {s:.0f}s\n")

            valid_mse = valid_loss.item()
            if best_mse > valid_mse:
                print("=> Best Model Updated : Epoch = {}, Valid MSE = {:.4f}\n".format(epoch, valid_mse))
                best_mse = valid_mse
                best_epoch = epoch
                torch.save(self.state_dict(), "./best_model/best_model.pt")
            else:
                print()

            # save model for every ? epoch
            if (epoch % save_every) == 0:
                torch.save(self.state_dict(),"./model/epoch{}_train{:.4f}_valid{:.4f}.pt".format(epoch, epoch_loss, valid_mse))

        m, s = divmod(time.time() - train_start, 60)
        print("\nTraining Finished...!!")
        print("\nBest Valid MSE : %.2f at epoch %d" % (best_mse, best_epoch))
        print(f"Total Time: {m:.0f}m {s:.0f}s\nModel was trained on {self.device}!")

        torch.save(self.state_dict(),"./model/epoch{}_train{:.4f}_valid{:.4f}.pt".format(epoch, epoch_loss, valid_mse))
    
    def restore(self):
        with open("./best_model/best_model.pt", "rb") as f:
            state_dict = torch.load(f)
        self.load_state_dict(state_dict)

    def predict(self, dataloader):
        with torch.no_grad():
            self.eval()
            true_y = []
            pred_y = []
            for batch_video_id, batch_image, batch_title, batch_meta, batch_target in dataloader:
                pred = self.forward(batch_image, batch_title, batch_meta)
                true_y.append(batch_target.numpy())
                pred_y.append(pred.cpu().numpy())
            true_y = np.concatenate(true_y, axis=0).squeeze()
            pred_y = np.concatenate(pred_y, axis=0)
        return true_y, pred_y #numpy array

    def plot(self):
        plt.plot(np.array(self.train_loss), "b")
        plt.plot(np.array(self.valid_loss), "g")
        plt.savefig("./graph.png")
        plt.show()


In [118]:
model = Model()
model.to(model.device)
model.train_(epochs, lr, train_loader, valid_loader, save_every)

Model will be trained on cuda

[Epoch   1 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 1, mini-batch 0, loss 57528459264.0





Train MSE = 3595528704.0000 | Valid MSE = 95607988224.0000
Train Time: 0m 5s

=> Best Model Updated : Epoch = 1, Valid MSE = 95607988224.0000

[Epoch   2 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 2, mini-batch 0, loss 60566953984.0





Train MSE = 3785434624.0000 | Valid MSE = 90827309056.0000
Train Time: 0m 5s

=> Best Model Updated : Epoch = 2, Valid MSE = 90827309056.0000

[Epoch   3 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 3, mini-batch 0, loss 24353771520.0





Train MSE = 1522110720.0000 | Valid MSE = 87439892480.0000
Train Time: 0m 5s

=> Best Model Updated : Epoch = 3, Valid MSE = 87439892480.0000

[Epoch   4 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 4, mini-batch 0, loss 151762780160.0





Train MSE = 9485173760.0000 | Valid MSE = 84178231296.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 4, Valid MSE = 84178231296.0000

[Epoch   5 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 5, mini-batch 0, loss 45845307392.0





Train MSE = 2865331712.0000 | Valid MSE = 80835551232.0000
Train Time: 0m 5s

=> Best Model Updated : Epoch = 5, Valid MSE = 80835551232.0000

[Epoch   6 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 6, mini-batch 0, loss 77161553920.0





Train MSE = 4822597120.0000 | Valid MSE = 77350035456.0000
Train Time: 0m 5s

=> Best Model Updated : Epoch = 6, Valid MSE = 77350035456.0000

[Epoch   7 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 7, mini-batch 0, loss 92885458944.0





Train MSE = 5805341184.0000 | Valid MSE = 74145587200.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 7, Valid MSE = 74145587200.0000

[Epoch   8 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 8, mini-batch 0, loss 33807454208.0





Train MSE = 2112965888.0000 | Valid MSE = 71440760832.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 8, Valid MSE = 71440760832.0000

[Epoch   9 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 9, mini-batch 0, loss 121777561600.0





Train MSE = 7611097600.0000 | Valid MSE = 69110644736.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 9, Valid MSE = 69110644736.0000

[Epoch  10 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 10, mini-batch 0, loss 62880227328.0





Train MSE = 3930014208.0000 | Valid MSE = 66867470336.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 10, Valid MSE = 66867470336.0000

[Epoch  11 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 11, mini-batch 0, loss 67099590656.0





Train MSE = 4193724416.0000 | Valid MSE = 64950226944.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 11, Valid MSE = 64950226944.0000

[Epoch  12 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 12, mini-batch 0, loss 58815168512.0





Train MSE = 3675948032.0000 | Valid MSE = 63427387392.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 12, Valid MSE = 63427387392.0000

[Epoch  13 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 13, mini-batch 0, loss 50695446528.0





Train MSE = 3168465408.0000 | Valid MSE = 62035361792.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 13, Valid MSE = 62035361792.0000

[Epoch  14 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 14, mini-batch 0, loss 99808862208.0





Train MSE = 6238053888.0000 | Valid MSE = 60852871168.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 14, Valid MSE = 60852871168.0000

[Epoch  15 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 15, mini-batch 0, loss 50530148352.0





Train MSE = 3158134272.0000 | Valid MSE = 59661258752.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 15, Valid MSE = 59661258752.0000

[Epoch  16 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 16, mini-batch 0, loss 18489352192.0





Train MSE = 1155584512.0000 | Valid MSE = 58670108672.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 16, Valid MSE = 58670108672.0000

[Epoch  17 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 17, mini-batch 0, loss 28266713088.0





Train MSE = 1766669568.0000 | Valid MSE = 57779441664.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 17, Valid MSE = 57779441664.0000

[Epoch  18 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 18, mini-batch 0, loss 47894118400.0





Train MSE = 2993382400.0000 | Valid MSE = 56996282368.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 18, Valid MSE = 56996282368.0000

[Epoch  19 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 19, mini-batch 0, loss 44923183104.0





Train MSE = 2807698944.0000 | Valid MSE = 56251744256.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 19, Valid MSE = 56251744256.0000

[Epoch  20 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 20, mini-batch 0, loss 41276297216.0





Train MSE = 2579768576.0000 | Valid MSE = 55547908096.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 20, Valid MSE = 55547908096.0000

[Epoch  21 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 21, mini-batch 0, loss 40880648192.0





Train MSE = 2555040512.0000 | Valid MSE = 54892748800.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 21, Valid MSE = 54892748800.0000

[Epoch  22 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 22, mini-batch 0, loss 34714189824.0





Train MSE = 2169636864.0000 | Valid MSE = 54338338816.0000
Train Time: 0m 5s

=> Best Model Updated : Epoch = 22, Valid MSE = 54338338816.0000

[Epoch  23 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 23, mini-batch 0, loss 50867556352.0





Train MSE = 3179222272.0000 | Valid MSE = 53777227776.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 23, Valid MSE = 53777227776.0000

[Epoch  24 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 24, mini-batch 0, loss 57654149120.0





Train MSE = 3603384320.0000 | Valid MSE = 53218009088.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 24, Valid MSE = 53218009088.0000

[Epoch  25 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 25, mini-batch 0, loss 52989640704.0





Train MSE = 3311852544.0000 | Valid MSE = 52699844608.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 25, Valid MSE = 52699844608.0000

[Epoch  26 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 26, mini-batch 0, loss 43463729152.0





Train MSE = 2716483072.0000 | Valid MSE = 52227723264.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 26, Valid MSE = 52227723264.0000

[Epoch  27 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 27, mini-batch 0, loss 44942082048.0





Train MSE = 2808880128.0000 | Valid MSE = 51758727168.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 27, Valid MSE = 51758727168.0000

[Epoch  28 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 28, mini-batch 0, loss 17721520128.0





Train MSE = 1107595008.0000 | Valid MSE = 51336871936.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 28, Valid MSE = 51336871936.0000

[Epoch  29 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 29, mini-batch 0, loss 32274946048.0





Train MSE = 2017184128.0000 | Valid MSE = 50928861184.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 29, Valid MSE = 50928861184.0000

[Epoch  30 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 30, mini-batch 0, loss 20980332544.0





Train MSE = 1311270784.0000 | Valid MSE = 50596806656.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 30, Valid MSE = 50596806656.0000

[Epoch  31 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 31, mini-batch 0, loss 23721541632.0





Train MSE = 1482596352.0000 | Valid MSE = 50288205824.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 31, Valid MSE = 50288205824.0000

[Epoch  32 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 32, mini-batch 0, loss 113073618944.0





Train MSE = 7067101184.0000 | Valid MSE = 49975971840.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 32, Valid MSE = 49975971840.0000

[Epoch  33 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 33, mini-batch 0, loss 21747922944.0





Train MSE = 1359245184.0000 | Valid MSE = 49757368320.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 33, Valid MSE = 49757368320.0000

[Epoch  34 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 34, mini-batch 0, loss 39744753664.0





Train MSE = 2484047104.0000 | Valid MSE = 49544118272.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 34, Valid MSE = 49544118272.0000

[Epoch  35 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 35, mini-batch 0, loss 37916196864.0





Train MSE = 2369762304.0000 | Valid MSE = 49308180480.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 35, Valid MSE = 49308180480.0000

[Epoch  36 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 36, mini-batch 0, loss 74791976960.0





Train MSE = 4674498560.0000 | Valid MSE = 48970018816.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 36, Valid MSE = 48970018816.0000

[Epoch  37 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 37, mini-batch 0, loss 8772907008.0





Train MSE = 548306688.0000 | Valid MSE = 48671764480.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 37, Valid MSE = 48671764480.0000

[Epoch  38 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 38, mini-batch 0, loss 47613177856.0





Train MSE = 2975823616.0000 | Valid MSE = 48384626688.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 38, Valid MSE = 48384626688.0000

[Epoch  39 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 39, mini-batch 0, loss 21362479104.0





Train MSE = 1335154944.0000 | Valid MSE = 48136122368.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 39, Valid MSE = 48136122368.0000

[Epoch  40 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 40, mini-batch 0, loss 25165105152.0





Train MSE = 1572819072.0000 | Valid MSE = 47868334080.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 40, Valid MSE = 47868334080.0000

[Epoch  41 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 41, mini-batch 0, loss 22774069248.0





Train MSE = 1423379328.0000 | Valid MSE = 47631187968.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 41, Valid MSE = 47631187968.0000

[Epoch  42 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 42, mini-batch 0, loss 61259821056.0





Train MSE = 3828738816.0000 | Valid MSE = 47267614720.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 42, Valid MSE = 47267614720.0000

[Epoch  43 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 43, mini-batch 0, loss 70383828992.0





Train MSE = 4398989312.0000 | Valid MSE = 46593400832.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 43, Valid MSE = 46593400832.0000

[Epoch  44 / 100]


Training:   0%|          | 0/16 [00:06<?, ?it/s]

Epoch 44, mini-batch 0, loss 50055294976.0





Train MSE = 3128455936.0000 | Valid MSE = 45818818560.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 44, Valid MSE = 45818818560.0000

[Epoch  45 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 45, mini-batch 0, loss 25477437440.0





Train MSE = 1592339840.0000 | Valid MSE = 45122301952.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 45, Valid MSE = 45122301952.0000

[Epoch  46 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 46, mini-batch 0, loss 30541271040.0





Train MSE = 1908829440.0000 | Valid MSE = 44476104704.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 46, Valid MSE = 44476104704.0000

[Epoch  47 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 47, mini-batch 0, loss 17060378624.0





Train MSE = 1066273664.0000 | Valid MSE = 43972673536.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 47, Valid MSE = 43972673536.0000

[Epoch  48 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 48, mini-batch 0, loss 42769383424.0





Train MSE = 2673086464.0000 | Valid MSE = 43331928064.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 48, Valid MSE = 43331928064.0000

[Epoch  49 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 49, mini-batch 0, loss 35420442624.0





Train MSE = 2213777664.0000 | Valid MSE = 42722766848.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 49, Valid MSE = 42722766848.0000

[Epoch  50 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 50, mini-batch 0, loss 46127685632.0





Train MSE = 2882980352.0000 | Valid MSE = 42046799872.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 50, Valid MSE = 42046799872.0000

[Epoch  51 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 51, mini-batch 0, loss 19639525376.0





Train MSE = 1227470336.0000 | Valid MSE = 41439453184.0000
Train Time: 0m 5s

=> Best Model Updated : Epoch = 51, Valid MSE = 41439453184.0000

[Epoch  52 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 52, mini-batch 0, loss 18828253184.0





Train MSE = 1176765824.0000 | Valid MSE = 40882716672.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 52, Valid MSE = 40882716672.0000

[Epoch  53 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 53, mini-batch 0, loss 25610530816.0





Train MSE = 1600658176.0000 | Valid MSE = 40408301568.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 53, Valid MSE = 40408301568.0000

[Epoch  54 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 54, mini-batch 0, loss 31709773824.0





Train MSE = 1981860864.0000 | Valid MSE = 39936434176.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 54, Valid MSE = 39936434176.0000

[Epoch  55 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 55, mini-batch 0, loss 23484014592.0





Train MSE = 1467750912.0000 | Valid MSE = 39293628416.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 55, Valid MSE = 39293628416.0000

[Epoch  56 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 56, mini-batch 0, loss 18986575872.0





Train MSE = 1186660992.0000 | Valid MSE = 38606815232.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 56, Valid MSE = 38606815232.0000

[Epoch  57 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 57, mini-batch 0, loss 10659368960.0





Train MSE = 666210560.0000 | Valid MSE = 37944852480.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 57, Valid MSE = 37944852480.0000

[Epoch  58 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 58, mini-batch 0, loss 34866925568.0





Train MSE = 2179182848.0000 | Valid MSE = 37291794432.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 58, Valid MSE = 37291794432.0000

[Epoch  59 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 59, mini-batch 0, loss 58534227968.0





Train MSE = 3658389248.0000 | Valid MSE = 36667535360.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 59, Valid MSE = 36667535360.0000

[Epoch  60 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 60, mini-batch 0, loss 21140385792.0





Train MSE = 1321274112.0000 | Valid MSE = 36074295296.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 60, Valid MSE = 36074295296.0000

[Epoch  61 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 61, mini-batch 0, loss 22350536704.0





Train MSE = 1396908544.0000 | Valid MSE = 35473289216.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 61, Valid MSE = 35473289216.0000

[Epoch  62 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 62, mini-batch 0, loss 18841475072.0





Train MSE = 1177592192.0000 | Valid MSE = 34996826112.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 62, Valid MSE = 34996826112.0000

[Epoch  63 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 63, mini-batch 0, loss 23451148288.0





Train MSE = 1465696768.0000 | Valid MSE = 34453082112.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 63, Valid MSE = 34453082112.0000

[Epoch  64 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 64, mini-batch 0, loss 23890276352.0





Train MSE = 1493142272.0000 | Valid MSE = 34001324032.0000
Train Time: 0m 5s

=> Best Model Updated : Epoch = 64, Valid MSE = 34001324032.0000

[Epoch  65 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 65, mini-batch 0, loss 30189592576.0





Train MSE = 1886849536.0000 | Valid MSE = 33501616128.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 65, Valid MSE = 33501616128.0000

[Epoch  66 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 66, mini-batch 0, loss 22853906432.0





Train MSE = 1428369152.0000 | Valid MSE = 33025619968.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 66, Valid MSE = 33025619968.0000

[Epoch  67 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 67, mini-batch 0, loss 28348538880.0





Train MSE = 1771783680.0000 | Valid MSE = 32558866432.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 67, Valid MSE = 32558866432.0000

[Epoch  68 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 68, mini-batch 0, loss 40215576576.0





Train MSE = 2513473536.0000 | Valid MSE = 32098088960.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 68, Valid MSE = 32098088960.0000

[Epoch  69 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 69, mini-batch 0, loss 17972822016.0





Train MSE = 1123301376.0000 | Valid MSE = 31679375360.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 69, Valid MSE = 31679375360.0000

[Epoch  70 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 70, mini-batch 0, loss 24319569920.0





Train MSE = 1519973120.0000 | Valid MSE = 31264147456.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 70, Valid MSE = 31264147456.0000

[Epoch  71 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 71, mini-batch 0, loss 12935078912.0





Train MSE = 808442432.0000 | Valid MSE = 30877052928.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 71, Valid MSE = 30877052928.0000

[Epoch  72 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 72, mini-batch 0, loss 21022433280.0





Train MSE = 1313902080.0000 | Valid MSE = 30486065152.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 72, Valid MSE = 30486065152.0000

[Epoch  73 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 73, mini-batch 0, loss 12764362752.0





Train MSE = 797772672.0000 | Valid MSE = 30093993984.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 73, Valid MSE = 30093993984.0000

[Epoch  74 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 74, mini-batch 0, loss 24646410240.0





Train MSE = 1540400640.0000 | Valid MSE = 29649907712.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 74, Valid MSE = 29649907712.0000

[Epoch  75 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 75, mini-batch 0, loss 26340339712.0





Train MSE = 1646271232.0000 | Valid MSE = 29225975808.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 75, Valid MSE = 29225975808.0000

[Epoch  76 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 76, mini-batch 0, loss 23335393280.0





Train MSE = 1458462080.0000 | Valid MSE = 28778559488.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 76, Valid MSE = 28778559488.0000

[Epoch  77 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 77, mini-batch 0, loss 16466948096.0





Train MSE = 1029184256.0000 | Valid MSE = 28337571840.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 77, Valid MSE = 28337571840.0000

[Epoch  78 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 78, mini-batch 0, loss 9793587200.0





Train MSE = 612099200.0000 | Valid MSE = 27934619648.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 78, Valid MSE = 27934619648.0000

[Epoch  79 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 79, mini-batch 0, loss 7853743104.0





Train MSE = 490858944.0000 | Valid MSE = 27581763584.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 79, Valid MSE = 27581763584.0000

[Epoch  80 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 80, mini-batch 0, loss 28161259520.0





Train MSE = 1760078720.0000 | Valid MSE = 27206549504.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 80, Valid MSE = 27206549504.0000

[Epoch  81 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 81, mini-batch 0, loss 62894747648.0





Train MSE = 3930921728.0000 | Valid MSE = 26792878080.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 81, Valid MSE = 26792878080.0000

[Epoch  82 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 82, mini-batch 0, loss 8698259456.0





Train MSE = 543641216.0000 | Valid MSE = 26436190208.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 82, Valid MSE = 26436190208.0000

[Epoch  83 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 83, mini-batch 0, loss 8711664640.0





Train MSE = 544479040.0000 | Valid MSE = 26097682432.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 83, Valid MSE = 26097682432.0000

[Epoch  84 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 84, mini-batch 0, loss 25320785920.0





Train MSE = 1582549120.0000 | Valid MSE = 25772281856.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 84, Valid MSE = 25772281856.0000

[Epoch  85 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 85, mini-batch 0, loss 6960014336.0





Train MSE = 435000896.0000 | Valid MSE = 25466372096.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 85, Valid MSE = 25466372096.0000

[Epoch  86 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 86, mini-batch 0, loss 38129135616.0





Train MSE = 2383070976.0000 | Valid MSE = 25132249088.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 86, Valid MSE = 25132249088.0000

[Epoch  87 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 87, mini-batch 0, loss 10350567424.0





Train MSE = 646910464.0000 | Valid MSE = 24829556736.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 87, Valid MSE = 24829556736.0000

[Epoch  88 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 88, mini-batch 0, loss 50038034432.0





Train MSE = 3127377152.0000 | Valid MSE = 24482965504.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 88, Valid MSE = 24482965504.0000

[Epoch  89 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 89, mini-batch 0, loss 26709786624.0





Train MSE = 1669361664.0000 | Valid MSE = 24087107584.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 89, Valid MSE = 24087107584.0000

[Epoch  90 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 90, mini-batch 0, loss 71368597504.0





Train MSE = 4460537344.0000 | Valid MSE = 23704082432.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 90, Valid MSE = 23704082432.0000

[Epoch  91 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 91, mini-batch 0, loss 18830649344.0





Train MSE = 1176915584.0000 | Valid MSE = 23328165888.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 91, Valid MSE = 23328165888.0000

[Epoch  92 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 92, mini-batch 0, loss 66010275840.0





Train MSE = 4125642240.0000 | Valid MSE = 22965030912.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 92, Valid MSE = 22965030912.0000

[Epoch  93 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 93, mini-batch 0, loss 21665327104.0





Train MSE = 1354082944.0000 | Valid MSE = 22620846080.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 93, Valid MSE = 22620846080.0000

[Epoch  94 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 94, mini-batch 0, loss 7998611456.0





Train MSE = 499913216.0000 | Valid MSE = 22293809152.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 94, Valid MSE = 22293809152.0000

[Epoch  95 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 95, mini-batch 0, loss 9740959744.0





Train MSE = 608809984.0000 | Valid MSE = 22007035904.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 95, Valid MSE = 22007035904.0000

[Epoch  96 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 96, mini-batch 0, loss 7681838080.0





Train MSE = 480114880.0000 | Valid MSE = 21734797312.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 96, Valid MSE = 21734797312.0000

[Epoch  97 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 97, mini-batch 0, loss 31791013888.0





Train MSE = 1986938368.0000 | Valid MSE = 21450856448.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 97, Valid MSE = 21450856448.0000

[Epoch  98 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 98, mini-batch 0, loss 11205928960.0





Train MSE = 700370560.0000 | Valid MSE = 21179111424.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 98, Valid MSE = 21179111424.0000

[Epoch  99 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 99, mini-batch 0, loss 8168870912.0





Train MSE = 510554432.0000 | Valid MSE = 20906039296.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 99, Valid MSE = 20906039296.0000

[Epoch 100 / 100]


Training:   0%|          | 0/16 [00:05<?, ?it/s]

Epoch 100, mini-batch 0, loss 8625212416.0





Train MSE = 539075776.0000 | Valid MSE = 20645023744.0000
Train Time: 0m 6s

=> Best Model Updated : Epoch = 100, Valid MSE = 20645023744.0000


Training Finished...!!

Best Valid MSE : 20645023744.00 at epoch 100
Total Time: 17m 45s
Model was trained on cuda!
