In [17]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import time
import tqdm
from tqdm.notebook import tqdm as notebooktqdm
import matplotlib.pyplot as plt
import pickle
from sklearn.model_selection import train_test_split
from PIL import Image
import timm
import os
# from google.colab import files

In [18]:
# work place
work_dir = './'
os.chdir(work_dir)

In [19]:
class YoutubeDataset(Dataset):
    def __init__(self, data, doc2vec):
        self.ids = list(data['video_id'])
        self.titles = doc2vec
        self.data = data
        self.image_encoder = timm.create_model('efficientnet_b1_pruned', features_only =True, pretrained=True)

    def __len__(self):
        return len(self.ids)

    def __getitem__(self, idx):
        video_id = self.ids[idx]
        
        image = Image.open( work_dir+'medium_15287/{}.jpg'.format(video_id))
        image = torch.FloatTensor(np.array(image)).permute(2, 0, 1).unsqueeze(dim=0)
        self.image_encoder.eval()
        feature_map = self.image_encoder(image)[-1].squeeze() # (320,6,10)
        
        title = self.titles[video_id]
        title = torch.FloatTensor(np.array(title, dtype=np.float16))
        
        meta = torch.FloatTensor(self.data[['period_day', 'subscriber_count']].to_numpy()[idx])
        
        y = np.log10(self.data['views'].to_numpy() + 1)
        y = np.expand_dims(y, axis=1)
        y = torch.FloatTensor(y[idx])
        
        return video_id, feature_map, title, meta, y

In [20]:
# from google.colab import drive
# drive.mount('/content/drive')

In [21]:
# add nomarlizing
data = pd.read_csv('./train.csv')
max_period = data['period_day'].mean()
min_period = data['period_day'].min()
max_sub = data['subscriber_count'].mean()
min_sub = data['subscriber_count'].min()
print(max_period, min_period, max_sub, min_sub)

data['period_day'] = data['period_day']/max_period
data['subscriber_count'] = data['subscriber_count']/max_sub

train_data, valid_data = train_test_split(data, test_size = 0.1, random_state = 55)
test_data = pd.read_csv('./test.csv')
train_data = train_data[:1000]
valid_data = valid_data[:100]
test_data = test_data[:100]
print('Train Dataset Size : ',len(train_data))
print('Validation Dataset Size : ',len(valid_data))
print('Test Dataset Size : ',len(test_data))

335.8148713475796 0.0 1784323.5617822357 0
Train Dataset Size :  1000
Validation Dataset Size :  100
Test Dataset Size :  100


In [22]:
# open doc2vec data and conver to dict
with open('./title_doc2vec_10', 'rb') as f:
    doc2vec = pickle.load(f)

data_dict=dict()
for row in doc2vec:
    vid=row[0]
    vec=row[1:]
    data_dict[vid]=vec

doc2vec = data_dict
print(len(doc2vec))

15287


In [23]:
#setting hyper parameters
batch_size = 64
epochs = 100
lr = 1e-4
save_every = 5
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

In [24]:
train_dataset = YoutubeDataset(train_data, doc2vec)
valid_dataset = YoutubeDataset(valid_data, doc2vec)
test_dataset = YoutubeDataset(test_data, doc2vec)

train_loader = DataLoader(train_dataset, batch_size = batch_size, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size = 1)
test_loader = DataLoader(test_dataset, batch_size = 1)

In [25]:
class Model(nn.Module):
    def __init__(self):
        super(Model, self).__init__()
        self.device = "cuda" if torch.cuda.is_available() else "cpu"
        self.feature_map_channel = 320
        self.feature_map_h = 6
        self.feature_map_w = 10
        self.bottle_nect_out_channel = 10

        self.title_channel = 10

        self.img_title_concat_hidden = 50
        self.img_title_concat_out = 10

        self.final_concat_hidden = 20
        self.final_concat_out = 1
        
        
        self.squeeze_conv = nn.Conv2d(in_channels=self.feature_map_channel, out_channels=self.bottle_nect_out_channel, kernel_size=3, padding=1) 
        self.max_pool = nn.MaxPool2d(kernel_size=(self.feature_map_h, self.feature_map_w), stride=1)
        self.img_title_concat_fc1 = nn.Linear(self.bottle_nect_out_channel+self.title_channel, self.img_title_concat_hidden)
        self.img_title_concat_fc2 = nn.Linear(self.img_title_concat_hidden, self.img_title_concat_out)

        self.final_concat_fc1 = nn.Linear(self.img_title_concat_out+2, self.final_concat_hidden)
        self.final_concat_fc2 = nn.Linear(self.final_concat_hidden, self.final_concat_out)
        self.dropout = nn.Dropout(0.5)

        self.fc1 = nn.Linear(19212, 500)
        self.fc2 = nn.Linear(500, 1)
        self.sigmoid = nn.Sigmoid()
        self.relu = nn.ReLU()
    
    def forward(self, feature_map, title, meta):
        feature_map = feature_map.to(self.device)
        title = title.to(self.device)
        meta = meta.to(self.device)

        squeezed_feature_map = self.squeeze_conv(feature_map)
        squeezed_feature_map = self.relu(squeezed_feature_map)
        dense_feature_map = torch.squeeze(self.max_pool(squeezed_feature_map), dim = (2,3))

        img_title_feature = torch.cat([dense_feature_map, title], dim=1)
        img_title_feature = self.img_title_concat_fc1(img_title_feature)
        img_title_feature = self.relu(img_title_feature)
        img_title_feature = self.img_title_concat_fc2(img_title_feature)
        img_title_feature = self.relu(img_title_feature)
        img_title_feature = self.dropout(img_title_feature)

        whole_feature = torch.cat([img_title_feature, meta], dim=1)
        whole_feature = self.final_concat_fc1(whole_feature)
        whole_feature = self.relu(whole_feature)
        whole_feature = self.dropout(whole_feature)
        x = self.final_concat_fc2(whole_feature)

        return x

    def train_(self, epochs, lr, train_loader, valid_loader, save_every):
        self.criterion = nn.MSELoss()
        self.optimizer = optim.Adam(self.parameters(), lr=lr)

        self.train_loss = []
        self.valid_loss = []

        best_mse = 1e100
        best_epoch = 1

        train_start = time.time()

        print("Model will be trained on {}\n".format(self.device))

        for epoch in range(1, epochs + 1):
            self.train()
            print("[Epoch {:3d} / {}]".format(epoch, epochs))

            epoch_start = time.time()
            epoch_loss = 0.0
            
            #training
            for batch_idx, batch_data in enumerate(tqdm.tqdm(train_loader, desc="Training")):
                batch_video_id, batch_image, batch_title, batch_meta, batch_target = batch_data
                batch_target = batch_target.to(self.device)
                
                self.optimizer.zero_grad()
                output = self.forward(batch_image, batch_title, batch_meta)
                loss = self.criterion(output, batch_target)
                loss.backward()
                self.optimizer.step()

                epoch_loss += loss.item()
                print('Epoch {}, mini-batch {}, loss {}'.format(epoch, batch_idx, loss.item()))

                # just for debuging
                break

            epoch_end = time.time()
            m, s = divmod(epoch_end - epoch_start, 60)

            epoch_loss /= len(train_loader)
            self.train_loss.append(epoch_loss)
            
            #validation
            with torch.no_grad():
                self.eval()
                true_y, pred_y = self.predict(valid_loader)                
                true_y = torch.FloatTensor(true_y).unsqueeze(dim=1)
                pred_y = torch.FloatTensor(pred_y)
                valid_loss = self.criterion(pred_y, true_y)
                self.valid_loss.append(valid_loss.item())

            print("Train MSE = {:.4f} | Valid MSE = {:.4f}".format(epoch_loss, valid_loss))
            print(f"Train Time: {m:.0f}m {s:.0f}s\n")

            valid_mse = valid_loss.item()
            if best_mse > valid_mse:
                print("=> Best Model Updated : Epoch = {}, Valid MSE = {:.4f}\n".format(epoch, valid_mse))
                best_mse = valid_mse
                best_epoch = epoch
                torch.save(self.state_dict(), "./best_model/best_model.pt")
            else:
                print()

            # save model for every ? epoch
            if (epoch % save_every) == 0:
                torch.save(self.state_dict(),"./model/epoch{}_train{:.4f}_valid{:.4f}.pt".format(epoch, epoch_loss, valid_mse))

        m, s = divmod(time.time() - train_start, 60)
        print("\nTraining Finished...!!")
        print("\nBest Valid MSE : %.2f at epoch %d" % (best_mse, best_epoch))
        print(f"Total Time: {m:.0f}m {s:.0f}s\nModel was trained on {self.device}!")

        torch.save(self.state_dict(),"./model/epoch{}_train{:.4f}_valid{:.4f}.pt".format(epoch, epoch_loss, valid_mse))
    
    def restore(self):
        with open("./best_model/best_model.pt", "rb") as f:
            state_dict = torch.load(f)
        self.load_state_dict(state_dict)

    def predict(self, dataloader):
        with torch.no_grad():
            self.eval()
            true_y = []
            pred_y = []
            for batch_video_id, batch_image, batch_title, batch_meta, batch_target in dataloader:
                pred = self.forward(batch_image, batch_title, batch_meta)
                true_y.append(batch_target.numpy())
                pred_y.append(pred.cpu().numpy())
            true_y = np.concatenate(true_y, axis=0).squeeze()
            pred_y = np.concatenate(pred_y, axis=0)
        return true_y, pred_y #numpy array

    def plot(self):
        plt.plot(np.array(self.train_loss), "b")
        plt.plot(np.array(self.valid_loss), "g")
        plt.savefig("./graph.png")
        plt.show()


In [26]:
model = Model()
model.to(model.device)
model.train_(epochs, lr, train_loader, valid_loader, save_every)

Model will be trained on cpu

[Epoch   1 / 100]


Training:   0%|          | 0/16 [00:03<?, ?it/s]


KeyboardInterrupt: 