# U.S. Presidential Election
## Libraries

In [None]:
import warnings
warnings.simplefilter('ignore')

import sys

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors
from matplotlib.cm import get_cmap
from matplotlib.colors import rgb2hex

import seaborn as sns
sns.set()

import utils

import networkx as nx

from giotto.mapper import mapper as mp
from giotto.mapper import visualize

from giotto.mapper.cover import CubicalCover

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

In [None]:
# define color map to use
cmap = matplotlib.colors.LinearSegmentedColormap.from_list("",
                                                           ["blue", "violet", "red"])

In [None]:
df = pd.read_pickle(os.path.join('data',
                                 'usa_election_full_dataset.pickle'))

df.head()

In [None]:
data = utils.get_data(df)
data_per_year = utils.split_data_by_year(data, df)

## Utility Functions

In [None]:
def filter_1d_trafo(x):
    return np.log(np.abs(x))

In [None]:
def filter_2d_trafo(x):
    x[:, 0] = np.log(x[:, 0] - min(x[:, 0]) + 1)
    x[:, 1] = np.log(np.abs(x[:, 1]) + 1)
    return x

In [None]:
def get_node_text(node_elements):
    return list(map(lambda x: f'Node Id: {x[0]}<br>Node size: {len(x[1])}',
                    node_elements.items()))

### Mapper
#### 1D PCA

In [None]:
pca = PCA(n_components=1)

filtr_vals = pca.fit_transform(data_per_year['2016'])

fig, ax = plt.subplots(nrows=2, ncols=1, figsize=(15, 10))

# pca 1
(sns.distplot(filtr_vals,
              ax=ax[0])
 .set_title('Original PCA 1'))
(sns.distplot(np.log(np.abs(filtr_vals)),
              ax=ax[1])
 .set_title('Transformed PCA 1'))

In [None]:
n_intervals = 50
overlap_frac = 0.1

In [None]:
filter_func = Pipeline([('pca', pca),
                        ('trafo', FunctionTransformer(filter_1d_trafo))])
cover = CubicalCover(n_intervals=n_intervals,
                     overlap_frac=overlap_frac)

pipe = mp.make_mapper_pipeline(filter_func=filter_func,
                               cover=cover)
graph = pipe.fit_transform(data_per_year['2016'])

In [None]:
seed = 1

node_elements = nx.get_node_attributes(graph, 'elements')
pos = nx.spring_layout(graph, seed=seed, dim=2)

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016]['winner'].values,
                                    summary_stat=np.mean)
custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

#### 2D PCA

In [None]:
pipe = mp.make_mapper_pipeline()
graph = pipe.fit_transform(data_per_year['2016'])

In [None]:
seed = 1

node_elements = nx.get_node_attributes(graph, 'elements')
pos = nx.spring_layout(graph, seed=seed, dim=2)

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016]['winner'].values,
                                    summary_stat=np.mean)
custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

#### Custom PCA Filter

In [None]:
pca = PCA(n_components=2)

In [None]:
filtr_vals = pca.fit_transform(data_per_year['2016'])

fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

# pca 1
(sns.distplot(filtr_vals[:, 0],
              ax=ax[0, 0])
 .set_title('Original PCA 1'))
(sns.distplot(np.log(filtr_vals[:, 0] - min(filtr_vals[:, 0]) + 1),
              ax=ax[1, 0])
 .set_title('Transformed PCA 1'))

# pca 2
(sns.distplot(filtr_vals[:, 1],
              ax=ax[0, 1])
 .set_title('PCA 2'))
(sns.distplot(np.log(np.abs(filtr_vals[:, 1]) + 1),
              ax=ax[1, 1])
 .set_title('Transformed PCA 2'))

In [None]:
filter_func = Pipeline([('pca', pca),
                        ('trafo', FunctionTransformer(filter_2d_trafo))])

In [None]:
pipe = mp.make_mapper_pipeline(filter_func=filter_func)
graph = pipe.fit_transform(data_per_year['2016'])

In [None]:
seed = 0

node_elements = nx.get_node_attributes(graph, 'elements')
pos = nx.spring_layout(graph, seed=seed, dim=2)

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016]['winner'].values,
                                    summary_stat=np.mean)

node_text = get_node_text(node_elements)

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_text': node_text}

visualize.create_network_2d(graph, pos, node_color, node_scale=20,
                            custom_plot_options=custom_plot_options)

#### Change Cover Parameters

In [None]:
n_intervals = 20
overlap_frac = 0.1

In [None]:
cover = CubicalCover(n_intervals=n_intervals,
                     overlap_frac=overlap_frac)

pipe = mp.make_mapper_pipeline(filter_func=filter_func,
                               cover=cover)
graph = pipe.fit_transform(data_per_year['2016'])

In [None]:
seed = 20

node_elements = nx.get_node_attributes(graph, 'elements')
pos = nx.spring_layout(graph, seed=seed, dim=3)

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016]['winner'].values,
                                    summary_stat=np.mean)

node_text = get_node_text(node_elements)

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_text': node_text,
                       'node_trace_hoverlabel': dict(
                            bgcolor=list(map(lambda x: rgb2hex(get_cmap('RdBu_r')(x)),
                                             node_color)))}

visualize.create_network_3d(graph, pos, node_color, node_scale=100,
                            custom_plot_options=custom_plot_options)

### Example for 3D Plot

In [None]:
seed = 14
pos = nx.spring_layout(graph, seed=seed, dim=3)

node_color = utils.get_node_summary(node_elements, data_per_year['2016'])

custom_plot_options = {'node_trace_marker_colorscale': 'RdBu',
                       'node_trace_marker_reversescale': True,
                       'node_trace_hoverlabel': dict(
                            bgcolor=list(map(lambda x: rgb2hex(get_cmap('RdBu_r')(x)),
                                             node_color)))}

visualize.create_network_3d(graph, pos, node_color,
                            node_scale=100, custom_plot_options=custom_plot_options)

### Interactive

In [None]:
plotly_kwargs = {'custom_plot_options': {'node_trace_marker_reversescale': True,
                                         'node_trace_marker_colorscale': 'RdBu'}}
visualize.create_interactive_network(pipe, data_per_year['2016'],
                                     plotly_kwargs=plotly_kwargs,
                                     summary_stat=np.mean)