### Import of data & packages

In [91]:
import pandas as pd
import plotly.graph_objects as go

def bubble(x, y, color, size, labels):
    data = [go.Scatter (
        x = x,
        y = y,
        text=labels,
        mode = 'markers',
        marker = dict(
            color=color,
            size=size,
            sizemode='area',
            sizeref=2.*max(size)/(45.**2),
            sizemin=4))]
    fig = go.Figure(data)
    fig.show()

def bar(x, y, colors):
    trace1 = go.Bar(
        x=x,
        y=y,
        marker_color=colors)
#     trace2 = go.Bar(
#         x=labels,
#         y=y2)
    fig = go.Figure([trace1])
#    fig.update_layout(barmode='stack')
    fig.show()
    
def barStack(labels, y1, y2):
    trace1 = go.Bar(
        x=labels,
        y=y1)
    trace2 = go.Bar(
        x=labels,
        y=y2)
    fig = go.Figure([trace1, trace2])
    fig.update_layout(barmode='stack')
    fig.show()    

    
init = pd.read_csv('CensusData2019.csv', encoding = "ISO-8859-1")

In [65]:
init.head()

Unnamed: 0,SUMLEV,REGION,DIVISION,STATE,COUNTY,STNAME,CTYNAME,CENSUS2010POP,ESTIMATESBASE2010,POPESTIMATE2010,...,RDOMESTICMIG2019,RNETMIG2011,RNETMIG2012,RNETMIG2013,RNETMIG2014,RNETMIG2015,RNETMIG2016,RNETMIG2017,RNETMIG2018,RNETMIG2019
0,40,3,6,1,0,Alabama,Alabama,4779736,4780125,4785437,...,1.917501,0.578434,1.186314,1.522549,0.563489,0.626357,0.745172,1.090366,1.773786,2.483744
1,50,3,6,1,1,Alabama,Autauga County,54571,54597,54773,...,4.84731,6.018182,-6.226119,-3.902226,1.970443,-1.712875,4.777171,0.849656,0.540916,4.560062
2,50,3,6,1,3,Alabama,Baldwin County,182265,182265,183112,...,24.017829,16.64187,17.488579,22.751474,20.184334,17.725964,21.279291,22.398256,24.727215,24.380567
3,50,3,6,1,5,Alabama,Barbour County,27457,27455,27327,...,-5.690302,0.292676,-6.897817,-8.132185,-5.140431,-15.724575,-18.238016,-24.998528,-8.754922,-5.165664
4,50,3,6,1,7,Alabama,Bibb County,22915,22915,22870,...,1.385134,-4.998356,-3.787545,-5.797999,1.331144,1.329817,-0.708717,-3.234669,-6.857092,1.831952


In [66]:
list(init.columns)

['SUMLEV',
 'REGION',
 'DIVISION',
 'STATE',
 'COUNTY',
 'STNAME',
 'CTYNAME',
 'CENSUS2010POP',
 'ESTIMATESBASE2010',
 'POPESTIMATE2010',
 'POPESTIMATE2011',
 'POPESTIMATE2012',
 'POPESTIMATE2013',
 'POPESTIMATE2014',
 'POPESTIMATE2015',
 'POPESTIMATE2016',
 'POPESTIMATE2017',
 'POPESTIMATE2018',
 'POPESTIMATE2019',
 'NPOPCHG_2010',
 'NPOPCHG_2011',
 'NPOPCHG_2012',
 'NPOPCHG_2013',
 'NPOPCHG_2014',
 'NPOPCHG_2015',
 'NPOPCHG_2016',
 'NPOPCHG_2017',
 'NPOPCHG_2018',
 'NPOPCHG_2019',
 'BIRTHS2010',
 'BIRTHS2011',
 'BIRTHS2012',
 'BIRTHS2013',
 'BIRTHS2014',
 'BIRTHS2015',
 'BIRTHS2016',
 'BIRTHS2017',
 'BIRTHS2018',
 'BIRTHS2019',
 'DEATHS2010',
 'DEATHS2011',
 'DEATHS2012',
 'DEATHS2013',
 'DEATHS2014',
 'DEATHS2015',
 'DEATHS2016',
 'DEATHS2017',
 'DEATHS2018',
 'DEATHS2019',
 'NATURALINC2010',
 'NATURALINC2011',
 'NATURALINC2012',
 'NATURALINC2013',
 'NATURALINC2014',
 'NATURALINC2015',
 'NATURALINC2016',
 'NATURALINC2017',
 'NATURALINC2018',
 'NATURALINC2019',
 'INTERNATIONALMIG201

In [67]:
by_state = init[init['SUMLEV'] == 40].groupby('STNAME')
state_pops = by_state['POPESTIMATE2019'].sum()

In [70]:
sen_scrape = pd.read_html('https://en.wikipedia.org/wiki/List_of_current_United_States_senators')
sen_init = sen_scrape[4]
sen_init = sen_init[['State', 'Senator', 'Party.1', 'Born', 'Assumed office', 'Term up']]

sen_with_pop = sen_init.merge(state_pops, how='left', left_on='State', right_index=True)
sen_with_pop.head()

#NOTE: Occupation / previous office / residence possibly useful later?

Unnamed: 0,State,Senator,Party.1,Born,Assumed office,Term up,POPESTIMATE2019
0,Alabama,Richard Shelby,Republican,(age 86),"January 3, 1987",2022,4903185
1,Alabama,Tommy Tuberville,Republican,(age 66),"January 3, 2021",2026,4903185
2,Alaska,Lisa Murkowski,Republican,(age 63),"December 20, 2002[d]",2022,731545
3,Alaska,Dan Sullivan,Republican,(age 56),"January 3, 2015",2026,731545
4,Arizona,Kyrsten Sinema,Democratic,(age 44),"January 3, 2019",2024,7278717


In [110]:
R_data = sen_with_pop[sen_with_pop['Party.1'] == 'Republican']
R_data.head()
R_pops = list(R_data['POPESTIMATE2019'])
R_sen_list = list(R_data['Senator'])
R_states = list(R_data['State'])

D_data = sen_with_pop[sen_with_pop['Party.1'] != 'Republican']
D_data.head()
D_pops = list(D_data['POPESTIMATE2019']) # includes independents
D_sen_list = list(D_data['Senator'])
D_states = list(D_data['State'])

I_data = sen_with_pop[sen_with_pop['Party.1'] == 'Independent[a]']
I_pops = list(I_data['POPESTIMATE2019'])

In [111]:
R_coords = []
for i in range(10):
    for j in range(5):
        R_coords.append([i, j])

D_coords = []
for i in range(10, 20):
    for j in range(5):
        D_coords.append([i, j])
        

In [112]:
colors = ['red'] * 50 + ['blue'] * 50
full_sen_list = R_sen_list + D_sen_list
state_list = R_states + D_states

In [115]:
x_vals = [a[0] for a in (R_coords + D_coords)]
y_vals = [a[1] for a in (R_coords + D_coords)]
pops = [a for a in (R_pops + D_pops)]

bubble(x_vals, y_vals, colors, pops, full_sen_list)

In [None]:
def bubble(x, y, color, size, labels):
    data = [go.Scatter (
        x = x,
        y = y,
        facet_col= ,
        text=labels,
        mode = 'markers',
        marker = dict(
            color=color,
            size=size,
            sizemode='area',
            sizeref=2.*max(size)/(45.**2),
            sizemin=4))]
    fig = go.Figure(data)
    fig.show()

In [90]:
all_Rs = sum(R_pops)
all_Ds = sum(D_pops)
just_Is = sum(I_pops)

In [87]:
D_data['Party.1'].value_counts()

Democratic        46
Independent[a]     2
Vacant             1
Name: Party.1, dtype: int64

In [119]:
party_sorted_data = sen_with_pop.sort_values(by='Party.1').reset_index(drop=True)

Unnamed: 0,State,Senator,Party.1,Born,Assumed office,Term up,POPESTIMATE2019
0,Illinois,Tammy Duckworth,Democratic,(age 52),"January 3, 2017",2022,12671821
1,New Mexico,Martin Heinrich,Democratic,(age 49),"January 3, 2013",2024,2096829
2,New Jersey,Cory Booker,Democratic,(age 51),"October 31, 2013[q]",2026,8882190
3,New Jersey,Bob Menendez,Democratic,(age 67),"January 17, 2006[p]",2024,8882190
4,New Hampshire,Maggie Hassan,Democratic,(age 62),"January 3, 2017",2022,1359711
...,...,...,...,...,...,...,...
95,Kentucky,Rand Paul,Republican,(age 58),"January 3, 2011",2022,4467673
96,Kentucky,Mitch McConnell,Republican,(age 78),"January 3, 1985",2026,4467673
97,Mississippi,Roger Wicker,Republican,(age 69),"December 31, 2007[n]",2024,2976149
98,Wyoming,Cynthia Lummis,Republican,(age 66),"January 3, 2021",2026,578759


In [120]:
pop_sorted_data = sen_with_pop.sort_values(by='POPESTIMATE2019').reset_index(drop=True)
pop_sorted_data.head()

Unnamed: 0,State,Senator,Party.1,Born,Assumed office,Term up,POPESTIMATE2019
0,Wyoming,Cynthia Lummis,Republican,(age 66),"January 3, 2021",2026,578759
1,Wyoming,John Barrasso,Republican,(age 68),"June 25, 2007[x]",2024,578759
2,Vermont,Bernie Sanders,Independent[a],(age 79),"January 3, 2007",2024,623989
3,Vermont,Patrick Leahy,Democratic,(age 80),"January 3, 1975",2022,623989
4,Alaska,Lisa Murkowski,Republican,(age 63),"December 20, 2002[d]",2022,731545


In [137]:
# labels = list(pop_sorted_data['State'])
colordict = {'Republican': 'red', 'Democratic':'blue', 'Independent[a]':'gray', 'Vacant':'blue'}
colors = [colordict[k] for k in list(pop_sorted_data['Party.1'])]

#bar(list(range(50)), list(pop_sorted_data['POPESTIMATE2019']), colors)

In [156]:
# STACK IT
stack_1 = sen_with_pop[sen_with_pop.index % 2 == 0]
stack_2 = sen_with_pop[sen_with_pop.index % 2 == 1]

colors1 = [colordict[k] for k in list(stack_1['Party.1'])]
colors2 = [colordict[k] for k in list(stack_2['Party.1'])]

stack_labels = list(range(50))



trace1 = go.Bar(
    x=list(stack_1['State']),
    y=list(stack_1['POPESTIMATE2019']/2),
    marker_color = colors1,
    text=list(stack_1['State']))

trace2 = go.Bar(
    x=list(stack_1['State']),
    y=list(stack_2['POPESTIMATE2019']/2),
    marker_color=colors2,
    hovertext=list(stack_2['State']))

fig = go.Figure([trace1])
fig.add_trace(trace2)
fig.add_vline(x=9.5, line_dash='dot')
fig.update_layout(barmode='stack', xaxis={'categoryorder':'total descending'})
#fig.update_layout(barmode='stack')
fig.show()    
# figure out how to just draw line between each half

In [151]:
mylabels = ['Republican', 'Democrat']

bar(mylabels, [all_Rs/2, all_Ds/2], ['red', 'blue'])

In [145]:

sen_with_pop.to_csv('math.csv', index=False)

In [150]:
new_sen = pd.read_csv('math.csv')
new_sen.head()

R_datax = new_sen[new_sen['Party.1'] == 'Republican']
R_popsx = list(R_datax['POPESTIMATE2019']/2)

D_datax = new_sen[new_sen['Party.1'] != 'Republican']
D_popsx = list(D_datax['POPESTIMATE2019']/2) # includes independents

I_datax = new_sen[new_sen['Party.1'] == 'Independent[a]']
I_popsx = list(I_datax['POPESTIMATE2019']/2)

all_Rsx = sum(R_popsx)
all_Dsx = sum(D_popsx)
just_Isx = sum(I_popsx)
mylabelsx = ['Republican', 'Democrat']

bar(mylabelsx, [all_Rsx, all_Dsx], ['red', 'blue'])

# OVER TIME