# Machine Learning

In [21]:
import numpy as np
import pandas as pd
import pickle

from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.metrics import f1_score, recall_score

import random

import dgl.function as fn
from dgl import DGLGraph

import time

import torch
import torch.nn as nn
import torch.nn.functional as F

### Loading the similarity matrices and generate graph

In [2]:
Data_path = 'Data/'

sim_mat = {}
names = ['keywords', 'genre', 'crew', 'cast']
for name in names:
    with open(Data_path+'csim_'+name, 'rb') as src:
        sim_mat[name] = pickle.load(src)
        print(f'>>> Loading {name} similarity matrix with shape {sim_mat[name].shape}')

>>> Loading keywords similarity matrix with shape (4802, 4802)
>>> Loading genre similarity matrix with shape (4802, 4802)
>>> Loading crew similarity matrix with shape (4802, 4802)
>>> Loading cast similarity matrix with shape (4802, 4802)


**Alternative 1** Combine the 4 similarity matrices equitably and pruned those with a similarity below 0.25

In [3]:
# Simple way
adj_mat = sim_mat['keywords'].copy()
for name in names[1:]:
    adj_mat = adj_mat.add(sim_mat[name], fill_value=0)
adj_mat = adj_mat/4    

threshold = 0.25
adj_mat[adj_mat < threshold] = 0

# Generate graph
G = DGLGraph(graph_data=adj_mat.values)

### Loading features and labels

In [4]:
features = ['budget','popularity','revenue','runtime','vote_average','vote_count','Nominations_GoldenGlobes']

IMDB_path = Data_path + 'nodes_attributes.csv'
IMDB = pd.read_csv(IMDB_path)
IMDB.drop(columns = ['Unnamed: 0','id'], inplace=True)
IMDB.set_index('title',inplace=True)

# features
IMDB_feat = IMDB[features]
tensor_feat = torch.FloatTensor(IMDB_feat.values)

# labels
IMDB_nom = IMDB['Nominations_Oscars'].copy()
IMDB_nom.loc[IMDB_nom > 0] = 1
tensor_nom = torch.LongTensor(IMDB_nom.values)

### Generate masks

In [5]:
sss = StratifiedShuffleSplit(n_splits=1, train_size=0.8, random_state=0)

for prov_index, test_index in sss.split(tensor_feat, tensor_nom):
    prov_mask = prov_index
    test_mask = test_index

for train_index, val_index in sss.split(tensor_feat[prov_mask], tensor_nom[prov_mask]):
    train_mask = train_index
    val_mask = val_index



In [6]:
class LaplacianPolynomial(nn.Module):
    def __init__(self,
                 in_feats: int,
                 out_feats: int,
                 k: int,
                 dropout_prob: float,
                 norm=True):
        super().__init__()
        self._in_feats = in_feats
        self._out_feats = out_feats
        self._k = k
        self._norm = norm
        # Contains the weights learned by the Laplacian polynomial
        self.pol_weights = nn.Parameter(torch.Tensor(self._k + 1))
        # Contains the weights learned by the logistic regression (without bias)
        self.logr_weights = nn.Parameter(torch.Tensor(in_feats, out_feats))
        self.dropout = nn.Dropout(p=dropout_prob)
        self.reset_parameters()

    def reset_parameters(self):
        """Reinitialize learnable parameters."""
        torch.manual_seed(0)
        torch.nn.init.xavier_uniform_(self.logr_weights, gain=0.01)
        torch.nn.init.normal_(self.pol_weights, mean=0.0, std=1e-3)

    def forward(self, graph, feat):
        r"""Compute graph convolution.

        Notes
        -----
        * Input shape: :math:`(N, *, \text{in_feats})` where * means any number of additional
          dimensions, :math:`N` is the number of nodes.
        * Output shape: :math:`(N, *, \text{out_feats})` where all but the last dimension are
          the same shape as the input.

        Parameters
        ----------
        graph (DGLGraph) : The graph.
        feat (torch.Tensor): The input feature

        Returns
        -------
        (torch.Tensor) The output feature
        """
        feat = self.dropout(feat)
        graph = graph.local_var()
        
        # D^(-1/2)
        norm = torch.pow(graph.in_degrees().float().clamp(min=1), -0.5)
        shp = norm.shape + (1,) * (feat.dim() - 1)
        norm = torch.reshape(norm, shp)
        
        # mult W first to reduce the feature size for aggregation.
        feat = torch.matmul(feat, self.logr_weights) # X*Teta

        result = self.pol_weights[0] * feat.clone() # a0*L^0*X*Teta <-- fisrt polynomial weight a0 * L^0 * x

        for i in range(1, self._k + 1): # get the next polynomial coefficient (a1*L^1, a2*L^2, ..... ak*L^k) 
            old_feat = feat.clone()
            if self._norm:
                feat = feat * norm
            graph.ndata['h'] = feat
            # Feat is not modified in place
            graph.update_all(fn.copy_src(src='h', out='m'),
                             fn.sum(msg='m', out='h')) # update all nodes with msg function copy_src (get data from source node) and reduce function sum
            if self._norm:
                graph.ndata['h'] = graph.ndata['h'] * norm

            feat = old_feat - graph.ndata['h']
            result += self.pol_weights[i] * feat

        return result

    def extra_repr(self):
        """Set the extra representation of the module,
        which will come into effect when printing the model.
        """
        summary = 'in={_in_feats}, out={_out_feats}'
        summary += ', normalization={_norm}'
        return summary.format(**self.__dict__)

Once we have are model ready we just need to create a function that performs one step of our training loop, and another one that evaluates our model.

In [22]:
def train(model, g, features, labels, loss_fcn, train_mask, optimizer):
    model.train()  # Activate dropout
    
    logits = model(g, features) # prediction
    loss = loss_fcn(logits[train_mask], labels[train_mask])

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    return loss

def evaluate(model, g, features, labels, mask):
    model.eval()  # Deactivate dropout
    with torch.no_grad():
        logits = model(g, features)[mask]  # only compute the evaluation set
        labels = labels[mask]
        _, indices = torch.max(logits, dim=1)
        correct = torch.sum(indices == labels)
        acc = correct.item() * 1.0 / len(labels)
        f1 = f1_score(labels, indices)
        recall = recall_score(labels, indices)
        #acc = torch.sum((logits.round() == labels).diagonal()).item() * 1.0 / len(labels)
        return f1, recall

Choose the training parameters.

In [25]:
in_feats = len(features)
output = 2
pol_order = 3
lr = 0.005
weight_decay = 5e-6
n_epochs = 1000
p_dropout = 0.2

And train the classifier end to end.

In [26]:
model = LaplacianPolynomial(in_feats, output, pol_order, p_dropout)

loss_fcn = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),
                             lr=lr,
                             weight_decay=weight_decay)

dur = []
for epoch in range(n_epochs):
    if epoch >= 3:
        t0 = time.time()
    loss = train(model, G, tensor_feat, tensor_nom, loss_fcn, train_mask, optimizer)
    if epoch >= 3:
        dur.append(time.time() - t0)
        
    f1, acc = evaluate(model, G, tensor_feat, tensor_nom, val_mask)
    
    if epoch%1 == 0:
        print("Epoch {:05d} | Time(s) {:.4f} | Train Loss {:.4f} | Val f1 {:.4%} | Val Accuracy {:.4%}". format(
                epoch+1, np.mean(dur), loss.item(), f1, acc))

print()
f1, acc = evaluate(model, G, tensor_feat, tensor_nom, test_mask)
print("Test Accuracy {:.4%}".format(acc))

Epoch 00001 | Time(s) nan | Train Loss 373.6951 | Val f1 34.8101% | Val Accuracy 78.5714%
Epoch 00002 | Time(s) nan | Train Loss 5433.4604 | Val f1 15.4506% | Val Accuracy 25.7143%
Epoch 00003 | Time(s) nan | Train Loss 2281.4609 | Val f1 18.3486% | Val Accuracy 28.5714%
Epoch 00004 | Time(s) 0.0066 | Train Loss 1114.9116 | Val f1 28.9256% | Val Accuracy 50.0000%
Epoch 00005 | Time(s) 0.0064 | Train Loss 69.8169 | Val f1 18.5714% | Val Accuracy 37.1429%
Epoch 00006 | Time(s) 0.0065 | Train Loss 1726.6543 | Val f1 17.7122% | Val Accuracy 34.2857%
Epoch 00007 | Time(s) 0.0065 | Train Loss 2348.0095 | Val f1 16.8498% | Val Accuracy 32.8571%
Epoch 00008 | Time(s) 0.0067 | Train Loss 956.1884 | Val f1 32.2892% | Val Accuracy 95.7143%
Epoch 00009 | Time(s) 0.0067 | Train Loss 65.7970 | Val f1 22.7425% | Val Accuracy 48.5714%
Epoch 00010 | Time(s) 0.0066 | Train Loss 447.7961 | Val f1 24.4898% | Val Accuracy 51.4286%
Epoch 00011 | Time(s) 0.0066 | Train Loss 1019.2767 | Val f1 23.6934% | Val 

Epoch 00092 | Time(s) 0.0069 | Train Loss 39.4069 | Val f1 19.0476% | Val Accuracy 17.1429%
Epoch 00093 | Time(s) 0.0069 | Train Loss 13.5947 | Val f1 18.8119% | Val Accuracy 27.1429%
Epoch 00094 | Time(s) 0.0069 | Train Loss 40.4158 | Val f1 33.3333% | Val Accuracy 51.4286%
Epoch 00095 | Time(s) 0.0069 | Train Loss 75.5392 | Val f1 32.5792% | Val Accuracy 51.4286%
Epoch 00096 | Time(s) 0.0069 | Train Loss 75.2958 | Val f1 30.4833% | Val Accuracy 58.5714%
Epoch 00097 | Time(s) 0.0069 | Train Loss 11.4496 | Val f1 25.0765% | Val Accuracy 58.5714%
Epoch 00098 | Time(s) 0.0069 | Train Loss 78.7752 | Val f1 31.6716% | Val Accuracy 77.1429%
Epoch 00099 | Time(s) 0.0069 | Train Loss 52.5274 | Val f1 30.0469% | Val Accuracy 45.7143%
Epoch 00100 | Time(s) 0.0069 | Train Loss 31.2557 | Val f1 26.4151% | Val Accuracy 40.0000%
Epoch 00101 | Time(s) 0.0068 | Train Loss 63.5791 | Val f1 28.1938% | Val Accuracy 45.7143%
Epoch 00102 | Time(s) 0.0068 | Train Loss 41.5747 | Val f1 27.2446% | Val Accura

Epoch 00192 | Time(s) 0.0066 | Train Loss 1.5844 | Val f1 5.0633% | Val Accuracy 2.8571%
Epoch 00193 | Time(s) 0.0066 | Train Loss 1.8879 | Val f1 19.5122% | Val Accuracy 17.1429%
Epoch 00194 | Time(s) 0.0066 | Train Loss 1.0402 | Val f1 17.8218% | Val Accuracy 25.7143%
Epoch 00195 | Time(s) 0.0066 | Train Loss 0.8967 | Val f1 21.2766% | Val Accuracy 50.0000%
Epoch 00196 | Time(s) 0.0066 | Train Loss 0.9134 | Val f1 30.7087% | Val Accuracy 55.7143%
Epoch 00197 | Time(s) 0.0066 | Train Loss 1.0177 | Val f1 30.6383% | Val Accuracy 51.4286%
Epoch 00198 | Time(s) 0.0066 | Train Loss 1.1032 | Val f1 29.6296% | Val Accuracy 45.7143%
Epoch 00199 | Time(s) 0.0066 | Train Loss 0.7405 | Val f1 27.5862% | Val Accuracy 62.8571%
Epoch 00200 | Time(s) 0.0066 | Train Loss 1.2389 | Val f1 14.0127% | Val Accuracy 15.7143%
Epoch 00201 | Time(s) 0.0066 | Train Loss 0.8364 | Val f1 12.0301% | Val Accuracy 11.4286%
Epoch 00202 | Time(s) 0.0066 | Train Loss 1.1140 | Val f1 19.1781% | Val Accuracy 20.0000%
E

Epoch 00288 | Time(s) 0.0066 | Train Loss 0.6125 | Val f1 2.7397% | Val Accuracy 1.4286%
Epoch 00289 | Time(s) 0.0066 | Train Loss 0.6179 | Val f1 2.7397% | Val Accuracy 1.4286%
Epoch 00290 | Time(s) 0.0066 | Train Loss 0.6249 | Val f1 7.7922% | Val Accuracy 4.2857%
Epoch 00291 | Time(s) 0.0066 | Train Loss 0.6184 | Val f1 20.2247% | Val Accuracy 12.8571%
Epoch 00292 | Time(s) 0.0066 | Train Loss 0.6185 | Val f1 19.0476% | Val Accuracy 11.4286%
Epoch 00293 | Time(s) 0.0066 | Train Loss 0.5987 | Val f1 8.0000% | Val Accuracy 4.2857%
Epoch 00294 | Time(s) 0.0066 | Train Loss 0.6078 | Val f1 5.4054% | Val Accuracy 2.8571%
Epoch 00295 | Time(s) 0.0066 | Train Loss 0.6015 | Val f1 10.2564% | Val Accuracy 5.7143%
Epoch 00296 | Time(s) 0.0066 | Train Loss 0.6091 | Val f1 20.2247% | Val Accuracy 12.8571%
Epoch 00297 | Time(s) 0.0066 | Train Loss 0.6233 | Val f1 11.3636% | Val Accuracy 7.1429%
Epoch 00298 | Time(s) 0.0066 | Train Loss 0.6156 | Val f1 7.7922% | Val Accuracy 4.2857%
Epoch 00299 |

Epoch 00384 | Time(s) 0.0065 | Train Loss 0.6072 | Val f1 17.5824% | Val Accuracy 11.4286%
Epoch 00385 | Time(s) 0.0065 | Train Loss 0.6165 | Val f1 12.3457% | Val Accuracy 7.1429%
Epoch 00386 | Time(s) 0.0065 | Train Loss 0.6007 | Val f1 7.8947% | Val Accuracy 4.2857%
Epoch 00387 | Time(s) 0.0065 | Train Loss 0.6169 | Val f1 7.6923% | Val Accuracy 4.2857%
Epoch 00388 | Time(s) 0.0065 | Train Loss 0.6046 | Val f1 17.9775% | Val Accuracy 11.4286%
Epoch 00389 | Time(s) 0.0065 | Train Loss 0.5998 | Val f1 16.8675% | Val Accuracy 10.0000%
Epoch 00390 | Time(s) 0.0065 | Train Loss 0.6096 | Val f1 7.7922% | Val Accuracy 4.2857%
Epoch 00391 | Time(s) 0.0065 | Train Loss 0.6070 | Val f1 7.6923% | Val Accuracy 4.2857%
Epoch 00392 | Time(s) 0.0065 | Train Loss 0.6073 | Val f1 18.3908% | Val Accuracy 11.4286%
Epoch 00393 | Time(s) 0.0065 | Train Loss 0.6175 | Val f1 7.7922% | Val Accuracy 4.2857%
Epoch 00394 | Time(s) 0.0065 | Train Loss 0.6098 | Val f1 7.8947% | Val Accuracy 4.2857%
Epoch 00395 

Epoch 00483 | Time(s) 0.0065 | Train Loss 0.6117 | Val f1 7.6923% | Val Accuracy 4.2857%
Epoch 00484 | Time(s) 0.0065 | Train Loss 0.6063 | Val f1 20.4545% | Val Accuracy 12.8571%
Epoch 00485 | Time(s) 0.0065 | Train Loss 0.6159 | Val f1 7.8947% | Val Accuracy 4.2857%
Epoch 00486 | Time(s) 0.0065 | Train Loss 0.6186 | Val f1 12.5000% | Val Accuracy 7.1429%
Epoch 00487 | Time(s) 0.0065 | Train Loss 0.6103 | Val f1 32.0611% | Val Accuracy 30.0000%
Epoch 00488 | Time(s) 0.0065 | Train Loss 0.6503 | Val f1 0.0000% | Val Accuracy 0.0000%
Epoch 00489 | Time(s) 0.0065 | Train Loss 0.6721 | Val f1 8.1633% | Val Accuracy 5.7143%
Epoch 00490 | Time(s) 0.0065 | Train Loss 0.6785 | Val f1 11.3208% | Val Accuracy 8.5714%
Epoch 00491 | Time(s) 0.0065 | Train Loss 0.6272 | Val f1 26.2774% | Val Accuracy 25.7143%
Epoch 00492 | Time(s) 0.0065 | Train Loss 0.6478 | Val f1 13.1868% | Val Accuracy 8.5714%
Epoch 00493 | Time(s) 0.0065 | Train Loss 0.6434 | Val f1 2.6667% | Val Accuracy 1.4286%
Epoch 00494 

  'precision', 'predicted', average, warn_for)


Epoch 00504 | Time(s) 0.0065 | Train Loss 0.6296 | Val f1 0.0000% | Val Accuracy 0.0000%
Epoch 00505 | Time(s) 0.0065 | Train Loss 0.6258 | Val f1 5.2632% | Val Accuracy 2.8571%
Epoch 00506 | Time(s) 0.0065 | Train Loss 0.6052 | Val f1 28.2828% | Val Accuracy 20.0000%
Epoch 00507 | Time(s) 0.0065 | Train Loss 0.6305 | Val f1 5.4795% | Val Accuracy 2.8571%
Epoch 00508 | Time(s) 0.0065 | Train Loss 0.6494 | Val f1 16.6667% | Val Accuracy 10.0000%
Epoch 00509 | Time(s) 0.0065 | Train Loss 0.6090 | Val f1 27.2000% | Val Accuracy 24.2857%
Epoch 00510 | Time(s) 0.0065 | Train Loss 0.6549 | Val f1 0.0000% | Val Accuracy 0.0000%
Epoch 00511 | Time(s) 0.0065 | Train Loss 0.6757 | Val f1 0.0000% | Val Accuracy 0.0000%
Epoch 00512 | Time(s) 0.0065 | Train Loss 0.6637 | Val f1 9.8039% | Val Accuracy 7.1429%
Epoch 00513 | Time(s) 0.0065 | Train Loss 0.6315 | Val f1 18.6441% | Val Accuracy 15.7143%
Epoch 00514 | Time(s) 0.0065 | Train Loss 0.6492 | Val f1 24.0000% | Val Accuracy 21.4286%
Epoch 00515

Epoch 00606 | Time(s) 0.0064 | Train Loss 0.6188 | Val f1 29.6296% | Val Accuracy 28.5714%
Epoch 00607 | Time(s) 0.0064 | Train Loss 0.6194 | Val f1 29.6296% | Val Accuracy 28.5714%
Epoch 00608 | Time(s) 0.0064 | Train Loss 0.6236 | Val f1 19.4690% | Val Accuracy 15.7143%
Epoch 00609 | Time(s) 0.0064 | Train Loss 0.6225 | Val f1 19.1304% | Val Accuracy 15.7143%
Epoch 00610 | Time(s) 0.0064 | Train Loss 0.6208 | Val f1 20.0000% | Val Accuracy 17.1429%
Epoch 00611 | Time(s) 0.0064 | Train Loss 0.6175 | Val f1 28.9855% | Val Accuracy 28.5714%
Epoch 00612 | Time(s) 0.0064 | Train Loss 0.6227 | Val f1 24.8062% | Val Accuracy 22.8571%
Epoch 00613 | Time(s) 0.0064 | Train Loss 0.6195 | Val f1 16.3636% | Val Accuracy 12.8571%
Epoch 00614 | Time(s) 0.0064 | Train Loss 0.6216 | Val f1 19.4690% | Val Accuracy 15.7143%
Epoch 00615 | Time(s) 0.0064 | Train Loss 0.6198 | Val f1 29.1971% | Val Accuracy 28.5714%
Epoch 00616 | Time(s) 0.0064 | Train Loss 0.6232 | Val f1 27.9070% | Val Accuracy 25.7143%

Epoch 00703 | Time(s) 0.0064 | Train Loss 0.6218 | Val f1 20.1835% | Val Accuracy 15.7143%
Epoch 00704 | Time(s) 0.0064 | Train Loss 0.6248 | Val f1 21.4286% | Val Accuracy 17.1429%
Epoch 00705 | Time(s) 0.0064 | Train Loss 0.6169 | Val f1 33.8028% | Val Accuracy 34.2857%
Epoch 00706 | Time(s) 0.0064 | Train Loss 0.6269 | Val f1 33.0935% | Val Accuracy 32.8571%
Epoch 00707 | Time(s) 0.0064 | Train Loss 0.6281 | Val f1 18.3486% | Val Accuracy 14.2857%
Epoch 00708 | Time(s) 0.0064 | Train Loss 0.6225 | Val f1 11.2150% | Val Accuracy 8.5714%
Epoch 00709 | Time(s) 0.0064 | Train Loss 0.6241 | Val f1 18.8034% | Val Accuracy 15.7143%
Epoch 00710 | Time(s) 0.0064 | Train Loss 0.6216 | Val f1 29.4118% | Val Accuracy 28.5714%
Epoch 00711 | Time(s) 0.0064 | Train Loss 0.6221 | Val f1 29.6296% | Val Accuracy 28.5714%
Epoch 00712 | Time(s) 0.0064 | Train Loss 0.6230 | Val f1 18.8034% | Val Accuracy 15.7143%
Epoch 00713 | Time(s) 0.0064 | Train Loss 0.6202 | Val f1 9.7087% | Val Accuracy 7.1429%
Ep

Epoch 00808 | Time(s) 0.0065 | Train Loss 0.6217 | Val f1 29.6875% | Val Accuracy 27.1429%
Epoch 00809 | Time(s) 0.0065 | Train Loss 0.6147 | Val f1 30.6569% | Val Accuracy 30.0000%
Epoch 00810 | Time(s) 0.0065 | Train Loss 0.6164 | Val f1 20.3390% | Val Accuracy 17.1429%
Epoch 00811 | Time(s) 0.0065 | Train Loss 0.6234 | Val f1 20.1681% | Val Accuracy 17.1429%
Epoch 00812 | Time(s) 0.0065 | Train Loss 0.6200 | Val f1 30.6569% | Val Accuracy 30.0000%
Epoch 00813 | Time(s) 0.0065 | Train Loss 0.6239 | Val f1 22.7642% | Val Accuracy 20.0000%
Epoch 00814 | Time(s) 0.0065 | Train Loss 0.6239 | Val f1 21.0526% | Val Accuracy 17.1429%
Epoch 00815 | Time(s) 0.0065 | Train Loss 0.6191 | Val f1 19.1304% | Val Accuracy 15.7143%
Epoch 00816 | Time(s) 0.0065 | Train Loss 0.6133 | Val f1 27.2727% | Val Accuracy 25.7143%
Epoch 00817 | Time(s) 0.0065 | Train Loss 0.6218 | Val f1 27.2727% | Val Accuracy 25.7143%
Epoch 00818 | Time(s) 0.0065 | Train Loss 0.6206 | Val f1 19.6429% | Val Accuracy 15.7143%

Epoch 00903 | Time(s) 0.0065 | Train Loss 0.6230 | Val f1 21.4286% | Val Accuracy 17.1429%
Epoch 00904 | Time(s) 0.0065 | Train Loss 0.6199 | Val f1 20.5128% | Val Accuracy 17.1429%
Epoch 00905 | Time(s) 0.0065 | Train Loss 0.6188 | Val f1 29.1971% | Val Accuracy 28.5714%
Epoch 00906 | Time(s) 0.0065 | Train Loss 0.6152 | Val f1 24.1935% | Val Accuracy 21.4286%
Epoch 00907 | Time(s) 0.0065 | Train Loss 0.6120 | Val f1 20.1681% | Val Accuracy 17.1429%
Epoch 00908 | Time(s) 0.0065 | Train Loss 0.6124 | Val f1 18.9655% | Val Accuracy 15.7143%
Epoch 00909 | Time(s) 0.0065 | Train Loss 0.6209 | Val f1 19.1304% | Val Accuracy 15.7143%
Epoch 00910 | Time(s) 0.0065 | Train Loss 0.6105 | Val f1 29.1971% | Val Accuracy 28.5714%
Epoch 00911 | Time(s) 0.0065 | Train Loss 0.6148 | Val f1 30.2158% | Val Accuracy 30.0000%
Epoch 00912 | Time(s) 0.0065 | Train Loss 0.6197 | Val f1 21.6667% | Val Accuracy 18.5714%
Epoch 00913 | Time(s) 0.0065 | Train Loss 0.6159 | Val f1 21.0526% | Val Accuracy 17.1429%

In [19]:
logits = model(G, tensor_feat)[val_mask]

In [20]:
logits

tensor([[-1.6505e+01, -4.5442e+01],
        [ 3.0281e+01,  1.8311e+01],
        [ 4.0512e+01,  3.1850e+01],
        [ 2.3657e+02,  2.4161e+02],
        [ 2.6200e+01,  3.9035e+01],
        [ 4.4417e+01,  7.1107e+01],
        [-1.5320e+02, -1.7104e+02],
        [-1.2668e+02, -9.1811e+01],
        [-5.3562e+00,  2.7610e+00],
        [ 2.7353e+01,  2.7014e+01],
        [ 5.9254e+01,  4.7545e+01],
        [-4.0358e+01, -3.6156e+01],
        [-9.3304e+01, -8.5320e+01],
        [-2.6811e+02, -3.3064e+02],
        [ 2.6127e+03,  3.0329e+03],
        [ 7.1619e+01,  9.0630e+01],
        [-3.7797e+02, -3.8506e+02],
        [ 1.6219e+01,  2.7804e+01],
        [-2.3302e+02, -2.2752e+02],
        [-6.5329e-01, -2.3114e+00],
        [-1.6333e+01,  1.0750e+01],
        [ 8.0963e+01,  6.8337e+01],
        [-4.3668e+00, -3.7891e+00],
        [ 2.4763e+01,  2.8849e+01],
        [-1.8929e+01, -1.2390e+01],
        [-1.5161e+01, -1.3882e+01],
        [-1.3987e+02, -1.0258e+02],
        [-3.7963e+01, -3.900

* Rajouter des features --> e.g. summary (bag of words), production company
* Batch? 
* Comprendre et améilorer l'architecture
* fine tuning des params --> pas optimisé pour le moment
* comprendre si les labels imbalance est problematique
* use only cast crew for graph construction and use keywods(2000 best)/genre/production companies as features 
* 


Question : 
* loss jump ?!
* stability of scores ?