In [1]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt

import sklearn.cluster
import sklearn.decomposition
import sklearn.metrics

In [2]:
from bokeh.colors import RGB
from bokeh.io import show, output_file, output_notebook, push_notebook
from bokeh.plotting import figure
from bokeh.models import GraphRenderer, StaticLayoutProvider, Circle, MultiLine, HoverTool, TapTool, BoxSelectTool
output_notebook()

In [3]:
import warnings
warnings.filterwarnings('ignore')

# Loading the Data

In [4]:
mmbrs = pd.read_csv('Sall_members.csv')
mmbrs.head()

Unnamed: 0,congress,chamber,icpsr,state_icpsr,district_code,state_abbrev,party_code,occupancy,last_means,bioname,...,died,nominate_dim1,nominate_dim2,nominate_log_likelihood,nominate_geo_mean_probability,nominate_number_of_votes,nominate_number_of_errors,conditional,nokken_poole_dim1,nokken_poole_dim2
0,1,President,99869,99,0,USA,5000,,,"WASHINGTON, George",...,,,,,,,,,,
1,1,Senate,2936,1,0,CT,5000,0.0,3.0,"ELLSWORTH, Oliver",...,1807.0,0.53,0.809,-24.37915,0.778,97.0,8.0,,0.528,0.849
2,1,Senate,4998,1,0,CT,5000,0.0,3.0,"JOHNSON, William Samuel",...,1819.0,0.991,0.137,-30.41227,0.69,82.0,16.0,,0.997,0.075
3,1,Senate,507,11,0,DE,4000,0.0,3.0,"BASSETT, Richard",...,1815.0,0.087,0.007,-38.18355,0.654,90.0,23.0,,0.024,0.166
4,1,Senate,7762,11,0,DE,5000,0.0,3.0,"READ, George",...,1798.0,0.282,-0.239,-34.31907,0.699,96.0,15.0,,0.27,-0.206


In [5]:
cast = pd.read_csv('Sall_votes.csv')
cast.head()

Unnamed: 0,congress,chamber,rollnumber,icpsr,cast_code,prob
0,1,Senate,1,507,1,90.4
1,1,Senate,1,1346,6,48.6
2,1,Senate,1,1536,1,99.8
3,1,Senate,1,2307,1,100.0
4,1,Senate,1,2936,1,99.7


# Converting the Data into a Graphic

<p> The code takes the set of senators in a certain session of congress, clusters them by political party and measures how much they vote with each other. If two senators vote in the same manner at least 60% of the time, a line is drawn between them. The senators are displayed as nodes - the further left/right they are, the more ideologically extreme they are. The x-axis does not have any directional meaning; only the magnitude matters. The y-axis has no meaning at all. </p>

<p> Note that the clustering is not necessarily accurate. If a senator is depicted by a blue dot, but is shown to be in the 'R'epublican Party, that is an example of a mistaken clustering. If a senator is depicted by a red dot, but is shown to be in the 'D'emocratic Party, that is another example. Senators are only clustered into 'R' and 'D' - there are no 'I'ndependent classifications for senators.</p>

In [44]:
def builder(congress, arranger=-1):
    votes = mmbrs[mmbrs['congress'] == congress][mmbrs['chamber'] == 'Senate'][['icpsr','bioname','state_abbrev',
                                                                                'party_code']]

    def simplify1(x):
        if(x == 200):
            return 'R'
        elif(x == 100):
            return 'D'
        else:
            return 'I'

    votes['party_code'] = votes['party_code'].apply(simplify1)
    
    casts = cast[cast['congress'] == congress][['icpsr','rollnumber','cast_code']]

    def simplify2(x):
        if(x == 0):
            return None
        elif(x <= 3):
            return 1
        elif(x <= 6):
            return 0
        else:
            return 0.5

    casts['cast_code'] = casts['cast_code'].apply(simplify2)

    for i in casts['rollnumber'].drop_duplicates():
        votes = votes.merge(casts[casts['rollnumber'] == i][['icpsr','cast_code']],how='left',on='icpsr')
        votes = votes.rename(columns={'cast_code':'Roll '+str(i)})
    
    votes = votes.fillna(0.5)
    kmeans_model = sklearn.cluster.KMeans(n_clusters=2).fit(votes.iloc[:,4:])
    labels = kmeans_model.labels_
    print(pd.crosstab(labels,votes['party_code']))
    
    temp = votes[['party_code']]
    temp['labels'] = labels
    labels2 = []
    labels2.append(temp[temp['labels'] == 0]['party_code'].mode()[0])
    labels2.append(temp[temp['labels'] == 1]['party_code'].mode()[0])
    
    labels4 = temp['party_code'].tolist()
    for i in range(len(labels2)): 
        if(labels2[i] == 'D'): labels2[i] = 'B'
    for i in range(len(labels4)): 
        if(labels4[i] == 'D'): labels4[i] = 'B'
    labels3 = pd.Series(labels).apply(lambda x: labels2[x])
    print("Accuracy: " + str(sklearn.metrics.accuracy_score(labels4, labels3)*100) + "%")

    pca_2=sklearn.decomposition.PCA(2)
    plot_columns = pca_2.fit_transform(votes.iloc[:,4:])*arranger
    
    start = np.min(plot_columns[:,0]); yall = np.zeros(plot_columns.shape[0]); #gap = 2.5
    gap = (np.max(plot_columns[:,0]) - np.min(plot_columns[:,0]))/6
    while(start <= np.max(plot_columns[:,0])):
        sample = np.where(np.logical_and(start <= plot_columns[:,0], plot_columns[:,0] < start + gap))
        yspace = np.linspace(-10.0, 10.0, num=sample[0].shape[0]+2)
        for i in range(sample[0].shape[0]):
            yall[sample[0][i]] = yspace[i+1] 
        start += gap

    def simplify3(x):
        if(x == 'R'):
            return RGB(255,0,0)
        else:
            return RGB(91,155,213)

    labels5 = labels3.apply(simplify3)

    def simplify4(x):
        if(x == 'R'):
            return RGB(220,0,0)
        else:
            return RGB(47,85,151)

    labels6 = labels3.apply(simplify4)
    
    N = np.shape(plot_columns)[0]
    issues = votes.shape[1] - 3
    node_indices = list(range(N))

    x=np.power(plot_columns[:,0],1); y=yall
    
    hover = HoverTool(tooltips=[("Name", "@names"),("State", "@state"),("Party", "@party"),("X-value", "@x_value"),])
    plot = figure(title="Demonstration of Senate Division", x_range=(np.min(x)-1,np.max(x)+1), y_range=(-11,11), 
                  tools=[hover], toolbar_location=None)
    
    graph = GraphRenderer()

    graph.node_renderer.glyph = Circle(size=15, fill_color="fill_color")
    graph.node_renderer.hover_glyph = Circle(size=15, fill_color="hover_color")
    graph.node_renderer.data_source.data = dict(
        index=node_indices,
        fill_color=labels5,
        hover_color=labels6,
        names=votes['bioname'],
        party=votes['party_code'],
        state=votes['state_abbrev'],
        x_value=x)

    start1 = []; end1 = []
    for i in range(N):
        for j in range(N):
            if(int(np.sum(np.equal(votes.iloc[i:i+1,4:], votes.iloc[j:j+1,4:]),axis=1)) > int(0.6 * issues)):
                start1.append(i)
                end1.append(j)

    graph.edge_renderer.glyph = MultiLine(line_color="#CCCCCC", line_width=1)
    graph.edge_renderer.hover_glyph = MultiLine(line_color="#000000", line_width=1)
    graph.edge_renderer.data_source.data = dict(start=start1,end=end1)

    graph_layout = dict(zip(node_indices, zip(x, y)))
    graph.layout_provider = StaticLayoutProvider(graph_layout=graph_layout)

    plot.renderers.append(graph)

    output_notebook()
    show(plot)

# Results

<p> These are the results of the 57th session of Congress (1901-1903). We look at the results every 10 years. The Senate seems fairly divided between the far-left, far-right and the centrists. </p>

In [45]:
builder(57)

party_code   D  I   R
row_0                
0           31  2   4
1            1  0  56
Accuracy: 92.5531914893617%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='5f7991b5-a05e-4b80-99e8-51a0c0fcd7e6', ...)]


<p> These are the results of the 62nd session of Congress (1911-1913). Everyone on both parties seems fairly divided. </p>

In [46]:
builder(62)

party_code   D   R
row_0             
0           53   5
1            0  52
Accuracy: 95.45454545454545%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='1aa49fd1-5923-4edd-9c6b-ac8a37df5355', ...)]


<p> These are the results of the 67th session of Congress (1921-1923). Everyone on both parties seems fairly divided. </p>

In [47]:
builder(67)

party_code   D   R
row_0             
0           39   9
1            1  58
Accuracy: 90.65420560747664%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='8c281c2d-aa68-43b8-843e-fa603126cee5', ...)]


<p> These are the results of the 72nd session of Congress (1931-1933). Everyone on both parties seems fairly divided.</p>

In [48]:
builder(72)

party_code   D  I   R
row_0                
0           53  1  21
1            0  0  31
Accuracy: 79.24528301886792%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='c36aa053-ccfb-493c-944e-2331be5ba078', ...)]


<p> These are the results of the 77th session of Congress (1941-1943). The Republicans seem fairly divided, while the Democrats are fairly united.</p>

In [49]:
builder(77, arranger=1)

party_code   D  I   R
row_0                
0           22  1  30
1           54  1   1
Accuracy: 77.06422018348624%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='c2007dc2-eacc-4a0c-b9a3-02aafa16610f', ...)]


<p> These are the results of the 82nd session of Congress (1951-1953). Everyone on both parties seems fairly divided.</p>

In [50]:
builder(82)

party_code   D   R
row_0             
0           42   2
1            9  46
Accuracy: 88.88888888888889%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='626e0ef1-9dad-4792-95ec-0688abfcd4ca', ...)]


<p> These are the results of the 87th session of Congress (1961-1963). Everyone on both parties seems united. High bipartisanship.</p>

In [51]:
builder(87, arranger=1)

party_code   D   R
row_0             
0           15  38
1           50   2
Accuracy: 83.80952380952381%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='a9cfcdd0-818c-4e19-a095-4eaae3595e38', ...)]


<p> These are the results of the 92nd session of Congress (1971-1973). Everyone on both parties seems united. High bipartisanship.</p>

In [52]:
builder(92, arranger=1)

party_code   D  I   R
row_0                
0           36  0   8
1           20  2  37
Accuracy: 70.87378640776699%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='c90b6831-7cd4-443f-8d43-6777fad22f6d', ...)]


<p> These are the results of the 97th session of Congress (1981-1983). Everyone on both parties seems united. High bipartisanship.</p>

In [53]:
builder(97)

party_code   D  I   R
row_0                
0           44  0   2
1            2  1  52
Accuracy: 95.04950495049505%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='98876142-2356-43a2-a8c9-dffad42ad64c', ...)]


<p> These are the results of the 102nd session of Congress (1991-1993). Everyone on both parties seems united. High bipartisanship.</p>

In [54]:
builder(102, arranger=1)

party_code   D   R
row_0             
0           58   2
1            0  42
Accuracy: 98.0392156862745%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='123fd70f-9843-4166-a7b9-44b150f26f5c', ...)]


<p> These are the results of the 107th session of Congress (2001-2003). Everyone on both parties seems united. High bipartisanship.</p>

In [55]:
builder(107)

party_code   D  I   R
row_0                
0           49  1   0
1            1  1  50
Accuracy: 97.05882352941177%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='85ee4f73-9ae3-4c24-b431-41a10f11f754', ...)]


<p> These are the results of the 112nd session of Congress (2011-2013). There is clear division between the left and the right.</p>

In [56]:
builder(112, arranger=1)

party_code   D  I   R
row_0                
0            1  0  46
1           52  1   2
Accuracy: 96.07843137254902%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='72484302-eeac-40f6-94f2-60d7ad73d201', ...)]


<p> These are the results of the 114th session of Congress (2015-2017), the last full session. There is clear division between the left and the right.</p>

In [57]:
builder(114)

party_code   D  I   R
row_0                
0           44  2   0
1            0  0  54
Accuracy: 98.0%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='3d6460ac-60b5-4a10-9ff6-266c0565a438', ...)]


<p> These are the results of the 115th session of Congress (2017-2019), the current session. There is clear division between the left and the right. Note that the 3 central senators either left early (Sessions - to join the Trump administration) or joined late (Jones, Smith - to replace outgoing senators), and are therefore points which should not be relied on.</p>

In [58]:
builder(115)

party_code   D  I   R
row_0                
0           48  2   1
1            0  0  52
Accuracy: 97.0873786407767%


E-1001 (BAD_COLUMN_NAME): Glyph refers to nonexistent column name: fill_color [renderer: GlyphRenderer(id='b35d2978-1c5a-4aab-835a-e9eac80137a9', ...)]


# Conclusion

<p> Clearly, the Senate has grown more divided over the past few decades. The recent few sessions are clearly divided, with few outliers at the center. In fact, in the current session, there are no centrist senators who can claim bipartisanship, unlike the previous session. The only senators in the current session who can come close to being bipartisan are Manchin (D-NV), Heitkamp (D-ND) and Collins (R-ME). While the Senate has in the past gone through times where senators did not cooperate much with each other, those times reflected more individualistic as opposed to partisan behavior.