In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt

import sklearn.cluster
import sklearn.decomposition
import sklearn.metrics

In [2]:
from ipywidgets import interact, IntSlider
from bokeh.colors import RGB
from bokeh.io import show, output_file, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.models import GraphRenderer, StaticLayoutProvider, Circle, MultiLine, HoverTool, TapTool, BoxSelectTool
output_notebook()

In [3]:
import warnings
warnings.filterwarnings('ignore')

In [4]:
mmbrs = pd.read_csv('Sall_members.csv')
mmbrs.head()

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,...,died,nominate_dim1,nominate_dim2,nominate_log_likelihood,nominate_geo_mean_probability,nominate_number_of_votes,nominate_number_of_errors,conditional,nokken_poole_dim1,nokken_poole_dim2
0,1,President,99869,99,0,USA,5000,,,"WASHINGTON, George",...,,,,,,,,,,
1,1,Senate,2936,1,0,CT,5000,0.0,3.0,"ELLSWORTH, Oliver",...,1807.0,0.53,0.809,-24.37915,0.778,97.0,8.0,,0.528,0.849
2,1,Senate,4998,1,0,CT,5000,0.0,3.0,"JOHNSON, William Samuel",...,1819.0,0.991,0.137,-30.41227,0.69,82.0,16.0,,0.997,0.075
3,1,Senate,507,11,0,DE,4000,0.0,3.0,"BASSETT, Richard",...,1815.0,0.087,0.007,-38.18355,0.654,90.0,23.0,,0.024,0.166
4,1,Senate,7762,11,0,DE,5000,0.0,3.0,"READ, George",...,1798.0,0.282,-0.239,-34.31907,0.699,96.0,15.0,,0.27,-0.206


In [5]:
cast = pd.read_csv('Sall_votes.csv')
cast.head()

Unnamed: 0,congress,chamber,rollnumber,icpsr,cast_code,prob
0,1,Senate,1,507,1,90.4
1,1,Senate,1,1346,6,48.6
2,1,Senate,1,1536,1,99.8
3,1,Senate,1,2307,1,100.0
4,1,Senate,1,2936,1,99.7


In [11]:
hover = HoverTool(tooltips=[("Name", "@names"),("State", "@state"),("Party", "@party"),("X-value", "@x_value"),])
plot = figure(title="Demonstration of Senate Division", x_range=(-11,11), y_range=(-11,11), tools=[hover], 
              toolbar_location=None)

In [18]:
def builder(congress=114):
    votes = mmbrs[mmbrs['congress'] == congress][mmbrs['chamber'] == 'Senate'][['icpsr','bioname','state_abbrev',
                                                                                'party_code']]

    def simplify1(x):
        if(x == 200):
            return 'R'
        elif(x == 100):
            return 'D'
        else:
            return 'I'

    votes['party_code'] = votes['party_code'].apply(simplify1)
    
    casts = cast[cast['congress'] == congress][['icpsr','rollnumber','cast_code']]

    def simplify2(x):
        if(x == 0):
            return None
        elif(x <= 3):
            return 1
        elif(x <= 6):
            return 0
        else:
            return 0.5

    casts['cast_code'] = casts['cast_code'].apply(simplify2)

    for i in casts['rollnumber'].drop_duplicates():
        votes = votes.merge(casts[casts['rollnumber'] == i][['icpsr','cast_code']],how='left',on='icpsr')
        votes = votes.rename(columns={'cast_code':'Roll '+str(i)})
    
    votes = votes.fillna(0.5)
    kmeans_model = sklearn.cluster.KMeans(n_clusters=2).fit(votes.iloc[:,4:])
    labels = kmeans_model.labels_
    print(pd.crosstab(labels,votes['party_code']))
    
    temp = votes[['party_code']]
    temp['labels'] = labels
    labels2 = []
    labels2.append(temp[temp['labels'] == 0]['party_code'].mode()[0])
    labels2.append(temp[temp['labels'] == 1]['party_code'].mode()[0])
    
    labels4 = temp['party_code'].tolist()
    for i in range(len(labels2)): 
        if(labels2[i] == 'D'): labels2[i] = 'B'
    for i in range(len(labels4)): 
        if(labels4[i] == 'D'): labels4[i] = 'B'
    labels3 = pd.Series(labels).apply(lambda x: labels2[x])
    print("Accuracy: " + str(sklearn.metrics.accuracy_score(labels4, labels3)*100) + "%")

    pca_2=sklearn.decomposition.PCA(2)
    plot_columns = -pca_2.fit_transform(votes.iloc[:,4:])
    
    start = np.min(plot_columns[:,0]); yall = np.zeros(plot_columns.shape[0]); #gap = 2.5
    gap = (np.max(plot_columns[:,0]) - np.min(plot_columns[:,0]))/6
    while(start <= np.max(plot_columns[:,0])):
        sample = np.where(np.logical_and(start <= plot_columns[:,0], plot_columns[:,0] < start + gap))
        yspace = np.linspace(-10.0, 10.0, num=sample[0].shape[0]+2)
        for i in range(sample[0].shape[0]):
            yall[sample[0][i]] = yspace[i+1] 
        start += gap

    def simplify3(x):
        if(x == 'R'):
            return RGB(255,0,0)
        else:
            return RGB(91,155,213)

    labels5 = labels3.apply(simplify3)

    def simplify4(x):
        if(x == 'R'):
            return RGB(220,0,0)
        else:
            return RGB(47,85,151)

    labels6 = labels3.apply(simplify4)
    
    N = np.shape(plot_columns)[0]
    issues = votes.shape[1] - 3
    node_indices = list(range(N))

    x=np.power(plot_columns[:,0],1); y=yall

    graph = GraphRenderer()

    graph.node_renderer.glyph = Circle(size=15, fill_color="fill_color")
    graph.node_renderer.hover_glyph = Circle(size=15, fill_color="hover_color")
    graph.node_renderer.data_source.data = dict(
        index=node_indices,
        fill_color=labels5,
        hover_color=labels6,
        names=votes['bioname'],
        party=votes['party_code'],
        state=votes['state_abbrev'],
        x_value=x)

    start1 = []; end1 = []
    for i in range(N):
        for j in range(N):
            if(int(np.sum(np.equal(votes.iloc[i:i+1,4:], votes.iloc[j:j+1,4:]),axis=1)) > int(0.6 * issues)):
                start1.append(i)
                end1.append(j)

    graph.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_width=1)
    graph.edge_renderer.hover_glyph = MultiLine(line_color="#000000", line_width=1)
    graph.edge_renderer.data_source.data = dict(start=start1,end=end1)

    graph_layout = dict(zip(node_indices, zip(x, y)))
    graph.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)

    plot.renderers.append(graph)

    push_notebook()

In [19]:
show(plot, notebook_handle=True)

E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='4291157f-89cc-4c5e-8baa-30509199c208', ...)]


In [20]:
display(interact(builder, congress=IntSlider(value=114,min=57,max=115,step=1)))

<function __main__.builder>

In [None]:
display