In [12]:
import networkx as nx
import pandas as pd
import numpy as np

In [13]:
training_df = pd.read_csv('data/training_data.csv')
training_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 172848 entries, 0 to 172847
Data columns (total 41 columns):
 #   Column                                         Non-Null Count   Dtype  
---  ------                                         --------------   -----  
 0   general_sector                                 172848 non-null  object 
 1   city                                           171380 non-null  object 
 2   zip_code                                       168919 non-null  float64
 3   specific_sector                                172848 non-null  object 
 4   state                                          172848 non-null  object 
 5   contributor_type                               172848 non-null  object 
 6   winner_ratio                                   172848 non-null  float64
 7   candidacy_count                                172848 non-null  int64  
 8   candidacy_democratic_count                     172848 non-null  int64  
 9   candidacy_republican_count           

  training_df = pd.read_csv('data/training_data.csv')


In [14]:
winning_bipartite_df = pd.read_csv('data/winning_candidates_state_bipartite_weighted_network.csv')
state_df = pd.read_csv('data/state_contributor_top100_contributors_network.csv', index_col='Unnamed: 0')
fed_df = pd.read_csv('data/federal_contributor_top100_contributors_network.csv', index_col= 'Unnamed: 0')
all_bipartite_df = pd.read_csv('data/all_candidates_state_bipartite_weighted_network.csv')

In [15]:
training_df.head(1)

Unnamed: 0,general_sector,city,zip_code,specific_sector,state,contributor_type,winner_ratio,candidacy_count,candidacy_democratic_count,candidacy_republican_count,...,governor_contribution_ratio,house_and_assembly_contribution_ratio,politician_challenger_ratio,politician_democratic_ratio,politician_incumbency_ratio,politician_open_pos_ratio,politician_republican_ratio,senate_contribution_ratio,us_house_contribution_ratio,us_senate_contribution_ratio
0,Retired,MISSOULA,59802.0,Retired,MT,Individual,1.0,4,4,0,...,0.5,0.0,0.0,1.0,0.5,0.5,0.0,0.0,0.0,0.0


In [16]:
# uncomment when running for the first time
all_bipartite_df.drop('Unnamed: 1', axis=1, inplace=True)
winning_bipartite_df.drop('Unnamed: 1', axis=1, inplace=True)
all_bipartite_df.rename(columns={'Unnamed: 0':'Name'},  inplace=True)
winning_bipartite_df.rename(columns={'Unnamed: 0':'Name'},   inplace=True)
all_bipartite_df.set_index('Name', inplace=True)
winning_bipartite_df.set_index('Name', inplace=True)

# Extract Features

In [18]:
def create_bipartite_graph(df):
    B = nx.Graph()
    df = df.select_dtypes(include='number')
    
    for index, row in df.iterrows():
        for column in df.columns:
            if row[column] > 0:  # Non-zero contributions
                B.add_edge(index, column, weight=row[column])
    return B

winning_graph = create_bipartite_graph(winning_bipartite_df)
all_candidates_graph = create_bipartite_graph(all_bipartite_df)

# Create graphs for the state and federal contributor networks (adjacency matrices)
state_contributor_graph = nx.from_pandas_adjacency(state_df)
federal_contributor_graph = nx.from_pandas_adjacency(fed_df)

In [19]:
state_contributor_degree = dict(state_contributor_graph.degree())
federal_contributor_degree = dict(federal_contributor_graph.degree())
state_contributor_weighted_degree = {node: sum(weight for _, _, weight in state_contributor_graph.edges(node, data='weight')) for node in state_contributor_graph.nodes()}
federal_contributor_weighted_degree = {node: sum(weight for _, _, weight in federal_contributor_graph.edges(node, data='weight')) for node in federal_contributor_graph.nodes()}
bipartite_degree = {node: winning_graph.degree(node) for node in winning_graph.nodes()}
print("State Contributor Degree:", state_contributor_degree)
print("Federal Contributor Degree:", federal_contributor_degree)
print("Bipartite Degree:", bipartite_degree)

State Contributor Degree: {'MCGUIRE, PERRY J DOUGLASVILLE 30135 GA': 3, 'HANCE, KENT R AUSTIN 78701 TX': 25, 'PHILADELPHIA TRIAL LAWYERS ASSOCIATION PHILADELPHIA 19107 PA': 6, 'STRICKLAND, DEANNA L BROOKLET 30415 GA': 0, 'LAW OFFICES OF PETER G ANGELOS BALTIMORE 21201 VA': 0, 'ELGIN, ROBERT L UNIVERSITY CITY 63130 MO': 7, 'BRISBANE, CHARLES LOCUST VALLEY 11560 NY': 0, 'KRAMER, ORIN S NEW YORK 10022 NY': 51, 'FERNANDEZ, RAUL J RESTON 20191 VA': 12, 'STEFANI, JOHN J CLARKSBURG 8510 NJ': 20, 'LOUISIANA MANUFACTURERS ASSOCIATION BATON ROUGE 70825 LA': 0, 'MURPHY, MARK B ROSWELL 88202 NM': 10, 'ANDREWS & KURTH HOUSTON 77002 TX': 31, 'MEARA, BRIAN R BAYSIDE 11361 NY': 30, 'SANDEL, JERRY W FARMINGTON 87401 NM': 12, 'BALBONI FOR SENATE DIX HILLS 11746 NY': 17, 'BLITZ, ROBERT D FRONTENAC 63131 MO': 5, 'OPPENHEIMER, SUZI MAMARONECK 10543 NY': 8, 'REPUBLICAN ASSEMBLY CAMPAIGN CMTE OF NEW YORK ALBANY 12210 NY': 12, 'CAMDEN COUNTY DEMOCRATIC CENTRAL CMTE OF NEW JERSEY CHERRY HILL 8003 NJ': 12, 'REP