## Revising the FEC dataset for a nodes layout

In [1]:
import pandas as pd

In [2]:
df = pd.read_csv("fec_nodes.csv")

In [3]:
df.head()

Unnamed: 0,cand_id,name,transaction_amt
0,H4PA13124,"STOREY, BAYARD THAYER PH.D ...",1000
1,H4PA13124,"STOREY, BAYARD THAYER PH.D ...",1000
2,H4PA13124,"TORRES, GREGORY T. ...",2500
3,H4PA13124,"TORRES, GREGORY T. ...",2500
4,H4PA13124,"SILVERMAN, ALVIN ...",2500


In [4]:
df['transaction_amt'] = df['transaction_amt'].astype('int')
df['cand_id'] = df['cand_id'].astype('str')
df['name'] = df['name'].astype('str')

In [5]:
df.to_csv("fec_nodes.csv", sep=',', index=False)

In [6]:
df = pd.read_csv("fec_nodes.csv")

In [7]:
df.head()

Unnamed: 0,cand_id,name,transaction_amt
0,H4PA13124,"STOREY, BAYARD THAYER PH.D ...",1000
1,H4PA13124,"STOREY, BAYARD THAYER PH.D ...",1000
2,H4PA13124,"TORRES, GREGORY T. ...",2500
3,H4PA13124,"TORRES, GREGORY T. ...",2500
4,H4PA13124,"SILVERMAN, ALVIN ...",2500


In [8]:
import networkx as nx


In [9]:
nodes = (df.cand_id, df.name)

In [10]:
print(nodes[0][6], nodes[1][6])

H4PA13124 SCHIFTER, RICHARD SR.                                                                                                                                                                                   


## Initial ingest of data to create node network (unweighted)

In [11]:
g = nx.Graph()
i = 0
j = 0
k = 1

for j in range(len(nodes[0])):
    g.add_edge(nodes[i][j], nodes[k][j])
    j = j+1

In [12]:
print(nx.info(g))

Name: 
Type: Graph
Number of nodes: 278635
Number of edges: 339021
Average degree:   2.4334


In [14]:
dftypes = df.groupby('cand_id')['name'].nunique()

print(dftypes)

cand_id
H0AL02087     267
H0AL05163     272
H0AL07086     186
H0AL07177       6
H0AR01083     426
H0AR02107     325
H0AR03055     461
H0AZ01259     125
H0AZ01325     293
H0AZ03248       1
H0AZ03321     421
H0AZ08056     291
H0CA02138      45
H0CA03078    2155
H0CA06121      16
H0CA08069     132
H0CA10073      87
H0CA10149     742
H0CA11337      38
H0CA15148     791
H0CA19082      15
H0CA19173     714
H0CA26087      17
H0CA27085     365
H0CA32101     658
H0CA33117     300
H0CA48024     616
H0CA49055     160
H0CA51036      73
H0CO04122    4781
             ... 
H8NY06048     113
H8NY07046     294
H8NY19058     292
H8NY20056     802
H8NY21203     181
H8NY24066     224
H8NY29032     360
H8OH01043     334
H8OH05036     421
H8OH07099     462
H8OH11141     171
H8OH15076     786
H8OR05107     410
H8PA01153     189
H8PA01187       4
H8PA05071     545
H8SC03062       1
H8TN02069      97
H8TN05252       1
H8TX20127      14
H8TX22107     477
H8UT03089     205
H8VA01147     339
H8VA11062     627
H8

## Reading in dataset to create an attributes list for candidate nodes

In [20]:
df = pd.read_csv("fec.csv")

In [21]:
df.head()

Unnamed: 0.1,Unnamed: 0,cand_id,result,cand_pty_affliation,cmte_id,amndt_ind,rpt_tp,transaction_pgi,entity_tp,name,state,transaction_dt,transaction_amt,sub_id
0,0,H4PA13124,1,DEM,C00389197,A,Q2,P,IND,"WOLF, WENDY C. ...",PA,4132011.0,250.0,4120120141231841188
1,1,H4PA13124,1,DEM,C00389197,A,Q2,P,IND,"WORTH, ROBERT R ...",NY,6302011.0,250.0,4120120141231841189
2,2,H4PA13124,1,DEM,C00389197,A,Q2,P,IND,"WORTH, ROBERT R ...",NY,6302011.0,250.0,4120120141231841189
3,3,H4PA13124,1,DEM,C00389197,A,Q2,P,IND,"WUKICH, ALEXIS ...",PA,6242011.0,250.0,4120120141231841190
4,4,H4PA13124,1,DEM,C00389197,A,Q2,P,IND,"WUKICH, ALEXIS ...",PA,6242011.0,250.0,4120120141231841190


In [22]:
df.transaction_pgi.unique()

array(['P    ', 'G    ', 'O    ', 'R    ', nan, 'C    ', 'S    ', 'E    ',
       '0    ', '2    '], dtype=object)

In [23]:
df.loc[df['transaction_pgi'] == 'G    ']

Unnamed: 0.1,Unnamed: 0,cand_id,result,cand_pty_affliation,cmte_id,amndt_ind,rpt_tp,transaction_pgi,entity_tp,name,state,transaction_dt,transaction_amt,sub_id
169,169,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"STOREY, BAYARD THAYER PH.D ...",PA,6092011.0,1000.0,4120120141231841159
170,170,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"STOREY, BAYARD THAYER PH.D ...",PA,6092011.0,1000.0,4120120141231841159
179,179,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"TORRES, GREGORY T. ...",MA,6102011.0,2500.0,4120120141231841165
180,180,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"TORRES, GREGORY T. ...",MA,6102011.0,2500.0,4120120141231841165
265,265,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"SILVERMAN, ALVIN ...",NY,6272011.0,2500.0,4120120141231841146
266,266,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"SILVERMAN, ALVIN ...",NY,6272011.0,2500.0,4120120141231841146
401,401,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"SCHIFTER, RICHARD SR. ...",MD,6292011.0,400.0,4120120141231841126
402,402,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"SCHIFTER, RICHARD SR. ...",MD,6292011.0,400.0,4120120141231841126
503,503,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"HANKIN, MARK ...",PA,6302011.0,2500.0,4120120141231840996
504,504,H4PA13124,1,DEM,C00389197,A,Q2,G,IND,"HANKIN, MARK ...",PA,6302011.0,2500.0,4120120141231840996


In [24]:
df = df.drop("Unnamed: 0", axis = 1)
df = df.drop("cmte_id", axis = 1)
df = df.drop("amndt_ind", axis = 1)
df = df.drop("rpt_tp", axis = 1)
df = df.drop("transaction_pgi", axis = 1)
df = df.drop("entity_tp", axis = 1)

In [25]:
df = df.drop("name", axis = 1)
df = df.drop("state", axis = 1)
df = df.drop("transaction_dt", axis = 1)
df = df.drop("transaction_amt", axis = 1)
df = df.drop("sub_id", axis = 1)

In [26]:
df.head()

Unnamed: 0,cand_id,result,cand_pty_affliation
0,H4PA13124,1,DEM
1,H4PA13124,1,DEM
2,H4PA13124,1,DEM
3,H4PA13124,1,DEM
4,H4PA13124,1,DEM


In [27]:
df.to_csv("fec_node_attributes.csv", sep=',', index=False)

## Trying to create a weighted graph using the above loop method

In [28]:
df = pd.read_csv("fec_nodes.csv")

In [29]:
df.head()

Unnamed: 0,cand_id,name,transaction_amt
0,H4PA13124,"STOREY, BAYARD THAYER PH.D ...",1000
1,H4PA13124,"STOREY, BAYARD THAYER PH.D ...",1000
2,H4PA13124,"TORRES, GREGORY T. ...",2500
3,H4PA13124,"TORRES, GREGORY T. ...",2500
4,H4PA13124,"SILVERMAN, ALVIN ...",2500


In [32]:
df.dtypes()

TypeError: 'Series' object is not callable

In [31]:
df['transaction_amt'] = df['transaction_amt'].astype('int')
df['cand_id'] = df['cand_id'].astype('str')
df['name'] = df['name'].astype('str')

In [33]:
weighted_nodes = (df.cand_id, df.name, df.transaction_amt)

In [34]:
g = nx.Graph()
i = 0
j = 0
k = 1
l = 2

for j in range(len(weighted_nodes[0])):
    g.add_edge(weighted_nodes[i][j], weighted_nodes[k][j], weight= weighted_nodes[l][j])
    j = j+1

In [35]:
print(nx.info(g))

Name: 
Type: Graph
Number of nodes: 278635
Number of edges: 339021
Average degree:   2.4334


In [37]:
nx.degree_centrality(g)

{'FRUM, SANDRA                                                                                                                                                                                            ': 3.5889374591758365e-06,
 'SCHOLL, CATHERINE L. DR.                                                                                                                                                                                ': 3.5889374591758365e-06,
 'DIX, PATRICK                                                                                                                                                                                            ': 3.5889374591758365e-06,
 'SKRLA, JOHN P                                                                                                                                                                                           ': 3.5889374591758365e-06,
 'MALONE, W.D.                                                                      