In [1]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import numpy as np
from scipy import sparse
from project_utils import *
from pygsp import graphs, filters

%load_ext autoreload
%autoreload 2

In [2]:
# Load data and graph
actors_agg_adj = np.load("sparse_agg_actor_adj.npy")
actors_agg_df = pd.read_pickle("actors_agg_df.pkl")
actors_graph = nx.from_numpy_matrix(actors_agg_adj)
G = graphs.Graph(actors_agg_adj)

In [3]:
# Laplacian and spectral decomposition
G.compute_laplacian('combinatorial')
laplacian = G.L.toarray()
lam, U = np.linalg.eig(laplacian)

In [4]:
# Create ideal high-pass filter
ideal_hp = np.ones((actors_agg_adj.shape[0],))
ideal_hp[lam <= 1] = 0  # High-pass filter with cut-off at lambda=0.1

# Create ideal low-pass filter
ideal_lp = np.ones((actors_agg_adj.shape[0],))
ideal_lp[lam >= 0.1] = 0  # Low-pass filter with cut-off at lambda=0.1

# Create ideal band-pass filter
ideal_bp = np.ones((actors_agg_adj.shape[0],))
ideal_bp[lam < 0.1] = 0  # Band-pass filter with cut-offs at lambda=0.1 and lambda=0.5
ideal_bp[lam > 0.5] = 0

# Create ideal Tikhonov filter
alpha = 0.99 / np.max(lam)
ideal_tk = np.ones((actors_agg_adj.shape[0],))
ideal_tk = 1 / (1 + alpha * lam)

In [5]:
# Create polynomial graph filter with given order
def graph_filter(order, ideal):
    coeff = fit_polynomial(lam, order, ideal)
    return polynomial_graph_filter(coeff, laplacian)

In [6]:
# Create polynomial graph filters
order = 3
g_f_hp = graph_filter(order, ideal_hp) # High-pass
g_f_lp = graph_filter(order, ideal_lp) # Low-pass
g_f_bp = graph_filter(order, ideal_bp) # Band-pass
g_f_tk = graph_filter(order, ideal_tk) # Tikhonov

In [7]:
def apply_filter_and_add_communities(graph_filter):
    # Get only continous features
    continuous_features = actors_agg_df.filter(["budget", "revenue", "vote_average"]).to_numpy()
    # Apply filter
    x_filtered = graph_filter @ continuous_features
    x_filtered_df = pd.DataFrame(data=x_filtered, columns=["budget", "revenue", "vote_average"])
    # Update dataframe with new filtered columns
    new_df = actors_agg_df.copy()
    new_df.update(x_filtered_df)
    # Add communities column
    communities_df = pd.DataFrame(data=actors_with_comunities_dataset[:,-1], columns=["community"])
    communities_df["community"] = communities_df["community"].apply(np.int)
    new_df = new_df.merge(communities_df, left_index=True, right_index=True)
    return new_df

In [8]:
actors_with_comunities_dataset = np.load("actors_with_communities_dataset.npy")
# Apply filter, get new dataframes
hp_new_df = apply_filter_and_add_communities(g_f_hp) # high-pass filter new dataframe
lp_new_df = apply_filter_and_add_communities(g_f_lp) # low-pass filter new dataframe
bp_new_df = apply_filter_and_add_communities(g_f_bp) # band-pass filter new dataframe
tk_new_df = apply_filter_and_add_communities(g_f_tk) # Tikhonov filter new dataframe