In [1]:
import pandas as pd 
import networkx as nx
import pickle
from os import listdir
from os.path import isfile, join
import sys
import math
import ast

In [2]:
politician_data = pd.read_csv("data/politician-data.csv",quotechar='"',sep="\t",converters=
                              {"occupation":ast.literal_eval})

In [3]:
def clean(x):
    """ Converts string to list
    """
    if type(x) == float:
        return []
    else: 
        return ast.literal_eval(x)

In [4]:
politician_data["party"] = politician_data["party"].apply(clean)

In [5]:
politician_data["nationality"] = politician_data["nationality"].apply(clean)

In [6]:
politician_data["name"] = politician_data["name"].apply(clean)

In [7]:
# politician_data

In [8]:
politician_data.to_json("data/politician-data.json")

In [9]:
def get_files(path):
    """ Returns a list of files in a directory
        Input parameter: path to directory
    """
    mypath = path
    complete = [f for f in listdir(mypath) if isfile(join(mypath, f))]
    return complete

In [10]:
files = get_files("data/filtered_edge_list")

In [11]:
def load_df(path, file):
    """ Loads a csv file, returns a dataframe
        Input parameters:
        1. path - to directory containing a file
        2. file - name of the file
    """
    return pd.read_csv(path+"/"+file)

In [12]:
# net = load_df("data/filtered_edge_list/",files[7])

In [13]:
# net

In [14]:
# G = nx.from_pandas_dataframe(net,'from','to')

In [15]:
# len(G.nodes())
# node_list = G.nodes()
# data = politician_data[politician_data["ID"].isin(node_list)]
# name_data = data[["ID","name"]].set_index('ID')['name'].to_dict()
# gender_data = data[["ID","gender"]].set_index('ID')['gender'].to_dict()
# occupation_data = data[["ID","occupation"]].set_index('ID')['occupation'].to_dict()
# nationality_data = data[["ID","nationality"]].set_index('ID')['nationality'].to_dict()
# party_data = data[["ID","party"]].set_index('ID')['party'].to_dict()

In [16]:
def set_attributes(dataframe, attribute_dataframe, graph_type = 'dir'):
    """ Returns a network with attributes assigned to each node
        Input parameters:
        1. dataframe - edge list
        3. attribute_dataframe - contains node id, and attributes (name, parrty, nationality, occupation, gender)
    """
    
    # load dataframe as graph
    if graph_type == 'dir':
        G = nx.from_pandas_dataframe(dataframe,'from','to', edge_attr=False, create_using=nx.DiGraph())
    elif graph_type == 'undir':
        G = nx.from_pandas_dataframe(dataframe,'from','to', edge_attr=False, create_using=nx.Graph())
    
    
#     G = nx.from_pandas_dataframe(dataframe,'from','to')
    # get list of nodes
    node_list = G.nodes()
    # create dictionaries
    data = attribute_dataframe[attribute_dataframe["ID"].isin(node_list)]
    name_data = data[["ID","name"]].set_index('ID')['name'].to_dict()
    gender_data = data[["ID","gender"]].set_index('ID')['gender'].to_dict()
    occupation_data = data[["ID","occupation"]].set_index('ID')['occupation'].to_dict()
    nationality_data = data[["ID","nationality"]].set_index('ID')['nationality'].to_dict()
    party_data = data[["ID","party"]].set_index('ID')['party'].to_dict()
    # set attributes 
    nx.set_node_attributes(G, 'gender', gender_data)
    nx.set_node_attributes(G, 'name', name_data)
    nx.set_node_attributes(G, 'occupation', occupation_data)
    nx.set_node_attributes(G, 'nationality', nationality_data)
    nx.set_node_attributes(G, 'party', party_data)
    
    #print stuff
    num_n = len(G.nodes())
    num_e = len(G.edges())
    print("Number of nodes: ", num_n)
    print("Number of edges: ", num_e)
    
    return G, num_n, num_e 

In [17]:
# output: 
# G.node[606]
# {'gender': 'male',
#  'name': [' brice lalonde '],
#  'nationality': ['french'],
#  'occupation': ['politician'],
#  'party': []}

In [18]:
# nx.write_gpickle(G,"data/graphs/x")

In [19]:
# k = nx.read_gpickle("data/graphs/x")

In [20]:
def save_network(G, path_save,file):
    """ Saves network on specified path as PICKLE
        Input parameters:
        1. Graph
        2. path_save - path to directory
        3. file name
    """
    print("Network saved as pickle on PATH: ", path_save+"/"+file)
    nx.write_gpickle(G,path_save+"/"+file)

In [21]:
# lst = []
# for file in files[:5]:
#     print(file)
#     net_df = load_df("data/filtered_edge_list",file)
#     G, num_n, num_e = set_attributes(net_df, politician_data)
#     sub_lst = [file,num_n, num_e]
#     lst.append(sub_lst)
#     save_network(G,"data/graphs",file.replace(".csv",""))
# df = pd.DataFrame(lst)
# df.columns = ["file","nodes","edges"]
# df.to_csv("data/graphs/stats.csv")

In [22]:
net_df = load_df("data/filtered_edge_list","2003_06.csv")
G, num_n, num_e = set_attributes(net_df, politician_data)


Number of nodes:  954
Number of edges:  1123


In [23]:
type(G)

networkx.classes.graph.Graph

In [24]:
# G = nx.DiGraph()

In [25]:
# G.is_directed()