# U.S. Presidential Election
## Libraries

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import warnings
warnings.simplefilter('ignore')

import sys

import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#import matplotlib.colors
from matplotlib.cm import get_cmap
from matplotlib.colors import rgb2hex

import seaborn as sns
sns.set()

import utils
from plotting import get_graph_plot_colored_by_winner

from giotto.mapper import mapper as mp
from giotto.mapper import visualize

from giotto.mapper.cover import CubicalCover
from giotto.mapper.cover import OneDimensionalCover
from giotto.mapper.cluster import FirstSimpleGap
from giotto.mapper.cluster import FirstHistogramGap

from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import FunctionTransformer

In [None]:
df = pd.read_pickle(os.path.join('data',
                                 'usa_election_full_dataset.pickle'))

df.head()

In [None]:
data = utils.get_data(df)
data_per_year = utils.split_data_by_year(data, df)

## Utility Functions

In [None]:
def filter_2d_trafo(x):
    x[:, 0] = np.log(x[:, 0] - min(x[:, 0]) + 1)
    x[:, 1] = np.log(np.abs(x[:, 1]) + 1)
    return x

## Mapper

In [None]:
pca = PCA(n_components=2)

In [None]:
filtr_vals = pca.fit_transform(data_per_year['2016'])

fig, ax = plt.subplots(nrows=2, ncols=2, figsize=(15, 10))

# pca 1
(sns.distplot(filtr_vals[:, 0],
              ax=ax[0, 0])
 .set_title('Original PCA 1'))
(sns.distplot(np.log(filtr_vals[:, 0] - min(filtr_vals[:, 0]) + 1),
              ax=ax[1, 0])
 .set_title('Transformed PCA 1'))

# pca 2
(sns.distplot(filtr_vals[:, 1],
              ax=ax[0, 1])
 .set_title('PCA 2'))
(sns.distplot(np.log(np.abs(filtr_vals[:, 1]) + 1),
              ax=ax[1, 1])
 .set_title('Transformed PCA 2'));

In [None]:
filter_func = Pipeline([('pca', pca),
                        ('trafo', FunctionTransformer(filter_2d_trafo))])

pipe = mp.make_mapper_pipeline(filter_func=filter_func)
graph = pipe.fit_transform(data_per_year['2016'])

In [None]:
node_elements = graph['node_metadata']['node_elements']

pos = graph.layout('kamada_kawai')

node_color = utils.get_node_summary(node_elements,
                                    filter_func.fit_transform(data_per_year['2016']),
                                    summary_stat=np.mean)

custom_plot_options = {
    'node_trace_marker_cmin': min(node_color),
    'node_trace_marker_cmax': max(node_color)}

visualize.create_network_2d(graph, pos, node_color,
                            custom_plot_options=custom_plot_options)

### Colored by Winner of Presidential Election

In [None]:
for year in range(2016, 1996, -4):
    print(f'-- {year} Election --')
    fig = get_graph_plot_colored_by_winner(graph, year, df, pos)
    fig.show(config={'scrollZoom': True})

### Colored by Economic Indicator

In [None]:
vertices_to_remove = [45, 18, 1, 7, 52, 55, 50, 49, 46, 51, 47, 30, 2, 44,
                      37, 54, 53, 9, 48, 13, 24]

giant_component = utils.get_subgraph(graph, vertices_to_remove)

node_elements = tuple(list(giant_component['node_metadata']['node_elements'])[v]
                      for v in range(graph.vcount()) if v not in vertices_to_remove)

pos = giant_component.layout('kamada_kawai')

num_cols, _, _ = utils.get_cols_by_type()
relevant_cols = ['Personal income (thousands of dollars)',
                 'Net earnings by place of residence',
                 'Personal current transfer receipts',
                 'Dividends, interest, and rent 2/',
                 'Per capita personal income 4/',
                 'Per capita personal current transfer receipts 4/',
                 'Per capita retirement and other 4/',
                 'Earnings by place of work',
                 'Wages and salaries']

for col in relevant_cols:
    print(f'-- {col}  --')
    node_color = utils.get_node_summary(node_elements,
                                        df[df['year'] == 2016][col].values,
                                        summary_stat=np.mean)
    
    node_text = utils.get_node_text(
        dict(zip(range(len(node_elements)),
                           node_elements)),
             utils.get_n_electors(
                 node_elements,
                 df[df['year'] == 2016]['n_electors'].reset_index(drop=True)),
             node_color,
             col)
    custom_plot_options = {
        'node_trace_text': node_text,
        'node_trace_marker_size': [1] * len(node_elements)}
    custom_plot_options['node_trace_text'] = node_text
    custom_plot_options['node_trace_marker_cmin'] = min(node_color)
    custom_plot_options['node_trace_marker_cmax'] = max(node_color)
    fig = visualize.create_network_2d(giant_component, pos, node_color, node_scale=20,
                                      custom_plot_options=custom_plot_options)
    fig.show()

### Example for 3D Plot

In [None]:
node_elements = graph['node_metadata']['node_elements']
pos = graph.layout('kamada_kawai_3d')

node_color = utils.get_node_summary(node_elements,
                                    df[df['year'] == 2016]['winner']
                                    .values,
                                    summary_stat=np.mean)

node_text = utils.get_node_text(
    dict(zip(range(len(node_elements)),
                   node_elements)),
         utils.get_n_electors(
             node_elements,
             df[df['year'] == 2016]['n_electors'].reset_index(drop=True)),
         node_color,
         'Number of Counties Won by Republicans')

custom_plot_options = {
    'node_trace_marker_colorscale': 'RdBu',
    'node_trace_marker_reversescale': True,
    'node_trace_hoverlabel': dict(
        bgcolor=list(map(lambda x: rgb2hex(get_cmap('RdBu_r')(x)),
                         node_color))),
    'node_trace_text': node_text,
    'node_trace_marker_size':
    utils.get_n_electors(node_elements,
                        df[df['year'] == 2016]['n_electors'].reset_index(drop=True)),
    'node_trace_marker_sizeref':
    .001 / max(utils.get_n_electors(node_elements,
                                    df[df['year'] == 2016]['n_electors'].reset_index(drop=True)))}

visualize.create_network_3d(graph, pos, node_color,
                            node_scale=100, custom_plot_options=custom_plot_options)

### Interactive

In [None]:
plotly_kwargs = {'custom_plot_options': {'node_trace_marker_reversescale': True,
                                         'node_trace_marker_colorscale': 'RdBu'}}
visualize.create_interactive_network(pipe, data_per_year['2016'],
                                     plotly_kwargs=plotly_kwargs,
                                     summary_stat=np.mean)