# Adjacency Construction

In [1]:
import pickle
import numpy as np
import pandas as pd
import networkx as nx

In [2]:
# the Similarity matrix to use 
Data_path = 'Data/'
Net_path = 'networks/'
file_name = 'Adjacency_matrix_G99CCK05.pickle'
network_name = 'network_G99CCK05.gexf'
names = ['genre', 'crew', 'cast','keywords']

Load the similarity matrices

In [3]:
sim_mat = {}
for name in names:
    with open(Data_path+'csim_'+name, 'rb') as src:
        sim_mat[name] = pickle.load(src)
        print(f'>>> Loading {name} similarity matrix with shape {sim_mat[name].shape}')

>>> Loading genre similarity matrix with shape (4802, 4802)
>>> Loading crew similarity matrix with shape (4802, 4802)
>>> Loading cast similarity matrix with shape (4802, 4802)
>>> Loading keywords similarity matrix with shape (4802, 4802)


Compute the Adjacency Matrix

In [4]:
# Adjacency matrix for Genres 
A1 = sim_mat['genre'].values
A1 = np.where(A1 < 0.99, 0, A1)

# Adjacency matrix for Cast Crew and Keywords
A2 = np.zeros(sim_mat['cast'].shape)
for wi, name in zip([1/3, 1/3, 1/3], ['cast', 'crew','keywords']):
     A2 += wi*sim_mat[name].values
A2 = np.where(A2 < 0.05, 0, A2)

A = 1/4*A1+3/4*A2

Save the matrix

In [5]:
with open(Data_path+file_name, 'wb') as handle:
    pickle.dump(A, handle, protocol=pickle.HIGHEST_PROTOCOL)
    print('Adjacency matrix saved at '+Data_path+file_name)

Adjacency matrix saved at Data/Adjacency_matrix_G99CCK05.pickle


Create and Save the graph with the nodes attributes as gexf file

In [6]:
features_df = pd.read_csv(Data_path+'features.csv', index_col=0).drop(columns=['production_companies', 'spoken_languages'])
labels_df = pd.read_csv(Data_path+'labels.csv', index_col=0).drop(columns=['title'])
nodes_attributes_df = pd.concat([features_df, labels_df], axis=1)

In [8]:
# checking that there is the good amount of rows (should equals number of node)
print(f'There are {A.shape[0]} nodes and {nodes_attributes_df.shape[0]} attributes rows.')

There are 4802 nodes and 4802 attributes rows.


In [7]:
G = nx.from_numpy_array(A)
nx.set_node_attributes(G, dict(nodes_attributes_df.transpose()))
nx.write_gexf(G, Net_path+network_name)