In [1]:
# Import packages
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import seaborn as sns
from sklearn.cluster import KMeans
from yellowbrick.cluster import KElbowVisualizer
from warnings import simplefilter
import plotly.express as px
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from IPython.display import display
from ipywidgets import AppLayout

In [2]:
# Polarization figure
variance_explained_by_year = []
for Year in range(1941, 2020 + 1):
    file = 'data_senate/data/' + str(Year) +'.csv'
    data = pd.read_csv(file)
    pca = PCA(n_components = 0.95)
    pca.fit(data.iloc[:,2:].T)
    variance_explained_by_year.append(pca.explained_variance_ratio_[0] * 100)
variance_explained_by_year_df = pd.DataFrame(variance_explained_by_year)
variance_explained_by_year_df = variance_explained_by_year_df.rename(columns = {0:'Varianced Explained by PC1'})
variance_explained_by_year_df['Year'] = range(1941, 2020 + 1)
polarization = px.line(variance_explained_by_year_df, x = "Year", y = "Varianced Explained by PC1", 
              title = 'Political Polarization Between 1941 and 2020')

In [3]:
# PCA + clustering
def pca_clustering(Year, Clusters, Color):
    file = 'data_senate/data/' + str(Year) +'.csv'
    data = pd.read_csv(file)
    pca = PCA(n_components = 0.95)
    pca.fit(data.iloc[:,2:].T)
    pca_data = pd.DataFrame(pca.components_)
    pca_data = pca_data.T
    data.insert(0, 'PC1', pca_data[0].values)
    data.insert(0, 'PC2', pca_data[1].values)
    data = data.rename(columns = {'Unnamed: 0':'Senator'})
    k_means = KMeans(n_clusters = Clusters, random_state = 20210318)
    k_means.fit(data[['PC1', 'PC2']])
    cluster = k_means.predict(data[['PC1', 'PC2']])
    data.insert(0, 'Cluster', cluster)
    data['Cluster']= data['Cluster'].astype(str)
    data = data.sort_values('Cluster')
    title = str(Year) + ': ' + str(len(data)) + ' Senators in the dataset'
    if Color == 'Party':
        fig = px.scatter(data, title = title, x = 'PC1', y = 'PC2', color = Color, hover_name = 'Senator',
                color_discrete_map = {
                "D": "blue",
                "R": "red"})
        fig.show()
    if Color == 'Cluster':
        fig = px.scatter(data, title = title, x = 'PC1', y = 'PC2', color = Color, hover_name = 'Senator')
        fig.show()

In [4]:
def print_issues(Year):
    file = 'data_senate/data/' + str(Year) +'.csv'
    data = pd.read_csv(file)
    
    print('Most divisive issues (Senate):')
    for link in list(data.iloc[:, 4:].std().sort_values(ascending = False)[:3].index):
        print(link)
    print('-' * 10)
    
    print('Least divisive issues (Senate):')
    for link in list(data.iloc[:, 4:].std().sort_values(ascending = True)[:3].index):
        print(link)
    print('-' * 10)
    
    democrats = data[(data['Party'] == 'D') | (data['Party'] == 'I')]
    
    print('Most divisive issues (Democratic Party):')
    for link in list(democrats.iloc[:, 4:].std().sort_values(ascending = False)[:3].index):
        print(link)
    print('-' * 10)
    
    print('Least divisive issues (Democratic Party):')
    for link in list(democrats.iloc[:, 4:].std().sort_values(ascending = True)[:3].index):
        print(link)
    print('-' * 10)
    
    republicans = data[data['Party'] == 'R']
    
    print('Most divisive issues (Republican Party):')
    for link in list(republicans.iloc[:, 4:].std().sort_values(ascending = False)[:3].index):
        print(link)
    print('-' * 10)
    
    print('Least divisive issues (Republican Party):')
    for link in list(republicans.iloc[:, 4:].std().sort_values(ascending = True)[:3].index):
        print(link)

In [5]:
yearslider = widgets.IntSlider(
    value = 2020,
    min = 1941,
    max = 2020,
    step = 1,
    description = 'Year:',
    disabled = False
)

clusterslider = widgets.IntSlider(
    value = 4,
    min = 2,
    max = 10,
    step = 1,
    description = 'Clusters:',
    disabled = False
)


plot = interactive(pca_clustering, 
         Year = yearslider, 
         Color = ['Party', 'Cluster'],
         Clusters = clusterslider);
issues = interactive(print_issues, Year = yearslider);

In [8]:
from ipywidgets import HBox, VBox
AppLayout(header = display(polarization), left_sidebar = plot, right_sidebar = issues)

AppLayout(children=(interactive(children=(IntSlider(value=2020, description='Year:', max=2020, min=1941), IntS…