# U.S. Presidential Election
## Libraries

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import warnings
warnings.simplefilter('ignore')

import sys

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
from matplotlib.cm import get_cmap
from matplotlib.colors import rgb2hex

import seaborn as sns
sns.set()

import utils

import networkx as nx

from giotto.mapper import mapper as mp
from giotto.mapper import visualize

from giotto.mapper.cover import CubicalCover
from giotto.mapper.cover import OneDimensionalCover
from giotto.mapper.cluster import FirstSimpleGap
from giotto.mapper.cluster import FirstHistogramGap

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

In [None]:
df = pd.read_pickle(os.path.join('data',
                                 'usa_election_full_dataset.pickle'))

df.head()

In [None]:
data = utils.get_data(df)
data_per_year = utils.split_data_by_year(data, df)

## Utility Functions

In [None]:
def filter_2d_trafo(x):
    x[:, 0] = np.log(x[:, 0] - min(x[:, 0]) + 1)
    x[:, 1] = np.log(np.abs(x[:, 1]) + 1)
    return x

In [None]:
def get_n_electors(node_elements, n_electors):
    return [100 * n_electors.iloc[x].sum() / 535 for x in node_elements]

In [None]:
def get_node_text(node_elements, n_electors):
    return list(map(lambda x, y: f'Node Id: {x[0]}<br>Node size: {len(x[1])}<br>Number of Electors: {y}',
                    node_elements.items(), n_electors))

#### 2D PCA

In [None]:
pca = PCA(n_components=2)

In [None]:
pipe = mp.make_mapper_pipeline()
graph = pipe.fit_transform(data_per_year['2016'])

In [None]:
node_elements = graph['node_metadata']['node_elements']
pos = graph.layout('kamada_kawai')

node_text = get_node_text(dict(zip(range(len(node_elements)),
                                   node_elements)),
                          get_n_electors(node_elements,
                                         df[df['year'] == 2016]['n_electors'].reset_index(drop=True)))

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016]['winner'].values,
                                    summary_stat=np.mean)

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_text': node_text,
                       'node_trace_marker_size':
                       get_n_electors(node_elements,
                                      df[df['year'] == 2016]['n_electors'].reset_index(drop=True)),
                      'node_trace_marker_sizeref':
                      .5 / max(get_n_electors(node_elements,
                                         df[df['year'] == 2016]['n_electors'].reset_index(drop=True)))}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

In [None]:
num_cols, _, _ = utils.get_cols_by_type()

for col in num_cols:
    print(f'-- {col}  --')
    node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016][col].values,
                                    summary_stat=np.mean)
    fig = visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)
    fig.show()

#### Custom PCA Filter

In [None]:
filtr_vals = pca.fit_transform(data_per_year['2016'])

fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

# pca 1
(sns.distplot(filtr_vals[:, 0],
              ax=ax[0, 0])
 .set_title('Original PCA 1'))
(sns.distplot(np.log(filtr_vals[:, 0] - min(filtr_vals[:, 0]) + 1),
              ax=ax[1, 0])
 .set_title('Transformed PCA 1'))

# pca 2
(sns.distplot(filtr_vals[:, 1],
              ax=ax[0, 1])
 .set_title('PCA 2'))
(sns.distplot(np.log(np.abs(filtr_vals[:, 1]) + 1),
              ax=ax[1, 1])
 .set_title('Transformed PCA 2'));

In [None]:
# n_intervals=10, overlap_frac=0.1
# n_intervals=10, overlap_frac=0.15
n_intervals = 10
overlap_frac = 0.1
filter_func = Pipeline([('pca', pca),
                        ('trafo', FunctionTransformer(filter_2d_trafo))])

cover = CubicalCover(n_intervals=n_intervals,
                     overlap_frac=overlap_frac)

relative_gap_size = 0.9
affinity = 'correlation'
linkage = 'complete'

clusterer = FirstSimpleGap(relative_gap_size=relative_gap_size,
                           affinity=affinity,
                           linkage=linkage)

In [None]:
pipe = mp.make_mapper_pipeline(filter_func=filter_func,
                               cover=cover)
graph = pipe.fit_transform(data_per_year['2016'])

### 2016 - Winner

In [None]:
node_elements = graph['node_metadata']['node_elements']
pos = graph.layout('kamada_kawai')

node_text = get_node_text(dict(zip(range(len(node_elements)),
                                   node_elements)),
                          get_n_electors(node_elements,
                                         df[df['year'] == 2016]['n_electors'].reset_index(drop=True)))

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016]['winner'].values,
                                    summary_stat=np.mean)

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_text': node_text,
                       'node_trace_marker_size':
                       get_n_electors(node_elements,
                                      df[df['year'] == 2016]['n_electors'].reset_index(drop=True)),
                      'node_trace_marker_sizeref':
                      .5 / max(get_n_electors(node_elements,
                                         df[df['year'] == 2016]['n_electors'].reset_index(drop=True)))}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

In [None]:
num_cols, _, _ = utils.get_cols_by_type()

for col in num_cols:
    print(f'-- {col}  --')
    node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016][col].values,
                                    summary_stat=np.mean)
    fig = visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)
    fig.show()

### 2012 - Winner

In [None]:
node_text = get_node_text(dict(zip(range(len(node_elements)),
                                   node_elements)))

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2012]['winner'].values,
                                    summary_stat=np.mean)

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_text': node_text}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

### 2008 - Winner

In [None]:
node_text = get_node_text(dict(zip(range(len(node_elements)),
                                   node_elements)))

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2008]['winner'].values,
                                    summary_stat=np.mean)

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_text': node_text}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

### 2004 - Winner

In [None]:
node_text = get_node_text(dict(zip(range(len(node_elements)),
                                   node_elements)))

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2004]['winner'].values,
                                    summary_stat=np.mean)

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_text': node_text}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

### 2000 - Winner

In [None]:
node_text = get_node_text(dict(zip(range(len(node_elements)),
                                   node_elements)))

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2000]['winner'].values,
                                    summary_stat=np.mean)

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_text': node_text}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

### Example for 3D Plot

In [None]:
seed = 14
pos = nx.spring_layout(graph, seed=seed, dim=3)

node_color = utils.get_node_summary(node_elements, data_per_year['2016'])

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_hoverlabel': dict(
                            bgcolor=list(map(lambda x: rgb2hex(get_cmap('RdBu_r')(x)),
                                             node_color)))}

visualize.create_network_3d(graph, pos, node_color,
                            node_scale=100, custom_plot_options=custom_plot_options)

### Interactive

In [None]:
plotly_kwargs = {'custom_plot_options': {'node_trace_marker_reversescale': True,
                                         'node_trace_marker_colorscale': 'RdBu'}}
visualize.create_interactive_network(pipe, data_per_year['2016'],
                                     plotly_kwargs=plotly_kwargs,
                                     summary_stat=np.mean)