## Community detection for all metropolitan areas

In [31]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))

In [32]:
import pandas as pd
import numpy as np
import geopandas as gpd
import seaborn as sns
import networkx as nx

import scipy
import csv

import matplotlib.cm as cm
import matplotlib.pyplot as plt
import json
import community as community_louvain
from copy import deepcopy
# from modularity_maximization.utils import get_modularity

from itertools import product
import networkx.algorithms.community as nx_comm
from scipy.spatial.distance import pdist, squareform

import math
from time import time

%matplotlib inline

In [33]:
from oct2py import octave
#octave.addpath('/home/ubuntu/GenLouvain/')
#octave.addpath('/home/ubuntu/GenLouvain/private/')
_ = octave.addpath('/home/barcsab/projects/urban_communities/scripts')
_ = octave.addpath('/home/ubuntu/GenLouvain/')
_ = octave.addpath('/home/ubuntu/GenLouvain/private/')



### data

In [34]:
# three networks - data IN
mobility = pd.read_csv("../data/usageousers_city_mobility_CT_networks.rpt.gz") ## basis of position and node importance calculations
follow_hh = pd.read_csv("../data/usageousers_city_follower_CT_HH_networks.rpt.gz")
follow_hh = follow_hh.rename(columns={"tract_home.1": "tract_home_1"})

# census tract name -> cbsacode
cbsacode = pd.read_csv("../data/cbsacode_shortname_tracts.csv",sep=";", index_col=0)
cbsacode['clean_name'] = cbsacode["short_name"].map(lambda s: s.split("/")[0].replace(' ','_').replace('.','').lower())

# census data
census = pd.read_csv("../data/censusdata_top50_2012.csv")
census_2 = pd.read_csv("../data/censusdata_top50_2017.csv")

# reading geojson data, converting it to geopandas dataframe
tract_geoms = gpd.GeoDataFrame.from_features(
    [json.loads(e.strip('\n')) for e in open('../data/censustract_geoms_top50.geojson').readlines()]
)

# Cartesian coordinate projection of tract centroids
tract_geoms['centroid'] = tract_geoms['geometry'].centroid
tract_center_dict = tract_geoms\
    .set_geometry('centroid',crs={'init':'epsg:4326'})\
    .to_crs({'init':'epsg:3785'})\
    .set_index('full_geoid')['centroid'].map(lambda p: p.coords[0]).to_dict()

  return _prepare_from_string(" ".join(pjargs))


In [35]:
counties = gpd.read_file('../data/tl_2012_us_county.shp')

DriverError: Unable to open ../data/tl_2012_us_county.shx or ../data/tl_2012_us_county.SHX. Set SHAPE_RESTORE_SHX config option to YES to restore or create it.

In [41]:
from dbfread import DBF
for record in DBF('../data/tl_2012_us_county.dbf'):
    print(record)

ModuleNotFoundError: No module named 'dbfread'

In [5]:
def create_graphs(city, g_type):
    """
    For a given city name, it generates a mobility and follower (home-home) graph.
    
    e.g. g_mob, g_fol_hh = create_graphs("Boston")
    
    It uses the previously loaded `mobility` and `follow_hh` pandas.DataFrames, in which
    the edges are listed for every city.
    
    Parameters:
    -----------
    city : str
        name of the city, see cbsacode dataframe -> clean_name
        
    g_type : str
        either "mob" as mobility or "fol_hh" as follow_hh 
        selects the type of graph to return
        
    Returns:
    --------
    
    g : networkx.Graph
        weighted undirected graph based on city name and g_type (e.g. follow_hh graph of Boston)
        
    """
    # city cbsacode based on name
    city_code = cbsacode[cbsacode.clean_name == city].iloc[0].cbsacode
    
    # select graph type
    if g_type == "mob":
        # filtering large dataframes for the given city code
        mob_df = mobility[(mobility["cbsacode"] == city_code)&(mobility["tract_home"]!=mobility["tract_work"])]

        # create graphs
        # create empty graphs
        g_mob = nx.DiGraph() # mobility graph - weights are counts

        # fill in the networks with data
        mob_df['w_edges'] = list(zip(mob_df.tract_home,mob_df.tract_work,mob_df.cnt))
        g_mob.add_weighted_edges_from(mob_df["w_edges"], weight='cnt')

        # ineffective and slow!
        for e in g_mob.edges():
            r = (e[1],e[0])

            if r in g_mob.edges():
                c1 = g_mob.edges[e]['cnt']
                c2 = g_mob.edges[r]['cnt']

                g_mob.edges[e]['cnt'] = c1 + c2
                g_mob.edges[r]['cnt'] = c1 + c2

        # then let's convert the mobility graph to udirected
        g_mob = g_mob.to_undirected()

        g = g_mob
        
    elif g_type == "fol_hh":            
        # filtering large dataframes for the given city code
        fol_hh_df = follow_hh[(follow_hh["cbsacode"] == city_code)&(follow_hh["tract_home"]!=follow_hh["tract_home_1"])]

        # create graphs
        # create empty graphs
        g_fol_hh = nx.Graph() # follow home-home graph - weights are counts

        # this is an undirected graph already in the dataframe
        fol_hh_df['w_edges'] = list(zip(fol_hh_df.tract_home,fol_hh_df.tract_home_1,fol_hh_df.cnt))
        g_fol_hh.add_weighted_edges_from(fol_hh_df["w_edges"], weight='cnt')
        
        g = g_fol_hh
        
    # TODO --> DONE
    # check data - if all nodes of the graph are in the tract_geom dataframe
    # e.g. in create_graphs()
    # if someone's not there, that is data error, print the tract_id, and leave the node out of the graph G
    # only after this should we calculate the Expert input data    
    while not set(g.nodes).issubset(set(tract_geoms.full_geoid)): # KERDES: ezt hogyan ellenőrizzem le?
        print('DATA ERROR. Node do(es) not have corresponding geodata, so dropped.')
        print('Dropped node(s):')
        nodes_to_drop = set(g.nodes).difference(set(tract_geoms.full_geoid))
        g.remove_nodes_from(nodes_to_drop)
    return g

In [6]:
def SpaMod(A,D,N,binnumber): # binnumber instead of b = binsize
    """
    Function that calculates the matrix for the clustering 
    based on spatial null model a la Expert.
    
    Parameters:
    -----------
    
    A : scipy.sparse.csr.csr_matrix
        adjacency matrix
    D : numpy.ndarray
        Distance matrix between the nodes
    N : numpy.matrix
        a measure of the importance of a node
        the number of users living(home-location) in the given tract
    binnumber : int
        number of distance bins (used in the estimation of the deterrence function)
    Returns:
    --------
    
    KERDES - ellenorizni
    ModularitySpa : 
    ModularityGN :
    """
    
    tic = time()
    
    print("Beginning of modularity function...");   
    # felesleges?? KERDES -- symmetrised matrix (doesn't change the outcome of community detection (arXiv:0812.1770))
    A = A + A.T ### KERDES KELL-e?? TODO ATGONDOLNI? ILLETVE LE KELL-e osztani 2-vel   / 2     
    b = D.max()/(binnumber-1) # MODIFIED
    
    # deterrence function
    det, detbins = np.histogram(
        D.flatten(),
        range = (0, np.ceil(D.max()/b)*b), # JAVITAS
        weights = np.array(A.todense()).flatten(), 
        bins=int(np.ceil(D.max()/b))
    )
    normadet, _ = np.histogram(
        D.flatten(), 
        range = (0, np.ceil(D.max()/b)*b),
        weights = np.array(N*N.T).flatten(), 
        bins=int(np.ceil(D.max()/b))
    )
    det = det / normadet
    det[np.isnan(det)] = 0
    
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    
    #tic = toc
    
    print("Null modell...")
    
    # copmutation of the randomised correlations (preserving space), spatial
    # null-model
    nullmodelSpa = det[np.digitize(D,detbins,right=True)-1]
    
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    
    #tic = toc
    
    print("Modularity calc...")
    
    # the modularity matrix for the spatial null-model
    ModularitySpa=A-np.multiply(N*N.T, nullmodelSpa*A.sum())/(np.multiply(N*N.T,nullmodelSpa).sum())
    szamlalo = np.multiply(N*N.T, nullmodelSpa*A.sum())
    nevezo = np.multiply(N*N.T,nullmodelSpa).sum()
    
    # the modularity matrix for the GN null-model
    degree = degree = A.sum(axis=0) # JAVITVA np.squeeze(np.asarray(A.sum(axis=0))) # degree or strength of nodes || asarry for further usage
    nullmodelGN = degree.T*degree/degree.sum() # Newman-Girvan null-model
    ModularityGN = A - nullmodelGN
    
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    
    return ModularitySpa, ModularityGN

In [22]:
# CONSENSUS CLUSTERING
def consen(city, algorithm_type, g_type):
    """
    Function that does the consensus clustering based on the results
    of multiple runs of previous algorithms.
    
    Parameters:
    -----------
    
    city : str
        cityname to runt he consensus clustering for (see cbsacode.clean_name)
    algorithm_type: str
        either "ms" or "mgn" 
        selects the clustering algoritm type: spatail (a la Expert) or ordinary Louvain clustering with Girvan-Newman
    g_type : str
        either "mobility" or "follow_hh"
        selects the type of graph
        
    Returns:
    --------
    
    s_louv : dict
        tract_geoid -> partition label (int)
    """
    
    tic = time()

    print("Reading in necessary data...")
    csv = '../data/consensus_' + city + '_' + algorithm_type + '_' + g_type + '.csv'

    # results of multiple iterations from previous runs
    iters = pd.read_csv(csv)
    iters = iters.set_index('geoid')
    iters['clusts'] = [np.array(l) for l in iters.values.tolist()]

    toc = time()
    print("Done.","%.2f" % (toc-tic))
    tic = toc

    print("Creating all possible node pairs...")
    # create all possible node pairs
    geoid_pairs = list(product(list(iters.index), list(iters.index)))
    consen_df = pd.DataFrame(geoid_pairs, columns=['geoid_1','geoid_2'])

    # remove selfloops
    consen_df = consen_df[consen_df.geoid_1!=consen_df.geoid_2]
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    tic = toc

    print("Joining interation results to node pairs...")
    # joining iteration results as lists to both elements of the tract pair
    consen_df = pd.merge(consen_df, iters['clusts'], left_on = 'geoid_1', right_on = 'geoid')
    consen_df = pd.merge(consen_df, iters['clusts'], left_on = 'geoid_2', right_on = 'geoid')
    consen_df = consen_df.rename(columns = {'clusts_x': 'clusts_1', 'clusts_y': 'clusts_2'})
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    tic = toc

    print("Counting same partitioning for node pairs...")
    # how many times are the two tracts (geoid_1 and geoid_2) clustered to the same community?
    # --> weights of a graph on which clustering gives the consensus clustering
    print("Calculating difference...")
    diff = np.array(consen_df['clusts_2'].tolist()) == np.array(consen_df['clusts_1'].tolist())
    del consen_df['clusts_1'], consen_df['clusts_2']
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    tic = toc

    print("Getting number of zero differences...") # HIBA: KIDOB NODE-OT!!!
    consen_df['w'] = diff.sum(axis=1)
    del diff
    # consen_df = deepcopy(consen_df[consen_df['w']!=0]) ## KERDES: Miért dob ki nodeot? 0326 JAVITAS
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    tic = toc

    print("Last Louvain...")
    # graph for consensus clustering
    print("Creating graph...")
    g_cons = nx.Graph() 
    g_cons.add_weighted_edges_from(consen_df[['geoid_1','geoid_2','w']].values, weight='w')
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    tic = toc

    del consen_df, iters

    print("Running Louvain...")
    # Louvain community detection 
    s_louv = community_louvain.best_partition(g_cons, weight='w')
    toc = time()
    print("Done.","%.2f" % (toc-tic))
    tic = toc

    return s_louv

In [None]:
# unique lists for city names
city_l = cbsacode.clean_name.unique()

tract_outdeg_mob = mobility.groupby('tract_home')[['cnt']].sum()

for city in city_l:
    for g_type in ['mob','fol_hh']:
        G = create_graphs(city, g_type) # corresponding weighted undirected graph
        
        
        # TODO --> DONE
        # index conversion dicts
        # for i,node_id in enumerate(G.nodes()):
        # for elem in enumerate(["alma","korte"]):
        #     print(elem)
        # geoid -> integer 0-... N-1
        # az elozo dict megforditottja
        # int -> geoid
        index_geoid_dict = dict(list(enumerate(G.nodes)))
        geoid_index_dict = dict(zip(list(index_geoid_dict.values()), list(index_geoid_dict.keys())))

        
        # Dataprep for Expert algorithm
        A = nx.adjacency_matrix(G)
        coords = np.array([tract_center_dict[n] for n in G.nodes()])
        d = pdist(coords)
        D = squareform(pdist(coords))
        
        # importance - number of user home in each tract
        # TODO we should check if all nodes in the follow_hh graph have an importance!
        # otherwise, the N... line is going to throw an error
        if not set(G.nodes).issubset(set(tract_outdeg_mob.reset_index().tract_home)): # test if the node is in any city KERDES : adott városra teszteljem?
            print('Error. Node(s) without importance value(s) They are dropped.') ## --> DONE
            missing_nodes = list(set(G.nodes)-set(tract_outdeg_mob.reset_index().tract_home))
            for node in missing_nodes:
                ## KERDES - ezt ki is dobjam??
                G.remove_node(node)    
        N = np.matrix([tract_outdeg_mob.loc[k].iloc[0] for k in G.nodes()]).T
        
        
        # Calculate clusterings for the given graph and write the outcome of runs to csvs
        S_ms_df = pd.DataFrame()
        S_mgn_df = pd.DataFrame()
        for _ in range(10):
            # TODO Eszter!!!! sometimes it gives an error in the first line
            # new thing: nout = 3
            Ms,Mgn = SpaMod(A,D,N,200) ### KERDES what should be the number of bins? 100?
            S_ms,Q_ms,n_it_ms = octave.iterated_genlouvain(Ms, nout=3)
            S_ms_df[len(S_ms_df.columns)] = S_ms.T[0]
            S_mgn,Q_mgn,n_it_mgn = octave.iterated_genlouvain(Mgn, nout=3)
            S_mgn_df[len(S_mgn_df.columns)] = S_mgn.T[0]

            # TODO itt egy lepesben meg lehet cisnalni mindket algorithm_type-ot!!! --> DONE
            for (algorithm_type, df) in [('ms',S_ms_df),('mgn',S_mgn_df)]:
                df['geoid'] = df.index.map(index_geoid_dict)
                # TODO --> DONE  S_df["geoid"] = S_df.index.map(a_masodik_dicted) -- kérdés: ez mit tud, amit az alatta levő sor nem?
                #S_df['geoid'] = list(G.nodes()) ## KERDES JO??? - szerintem igen (Eszter)
                df = df.set_index('geoid')
                csv_name = 'consensus_' + city + '_' + algorithm_type + '_' + g_type + '.csv'
                df.to_csv('../data/'+ csv_name)
                # TODO --> DONE mindket algorithm type-ra kimenteni a csv-t

In [9]:
from itertools import product

In [11]:
all_combs = product(city_l, ['mob','fol_hh'], ['ms','mgn'])
city, g_type, algorithm_type = list(all_combs)[0]
city, g_type, algorithm_type

('new_york', 'mob', 'ms')

In [25]:
# calculating CONSENSUS and putting together the overall dataset and its MODULARITY
all_consensus_df = pd.DataFrame()
for city in city_l:
    
    for g_type in ['mob','fol_hh']:
        
        for algorithm_type in ['ms','mgn']:
            
            print (city)
            print (g_type)
            print (algorithm_type)
            
            # storing iteration results, empty dataframe for nodes
            consensus_df = pd.DataFrame()
            # TODO atirni az uj fuggveny szerint
            # de az uj fuggveny meg nem eleg gyors
            ## eddig ment HIBA, KÉRDÉS
            S_cons = consen(city, algorithm_type, g_type)
            consensus_df['S_cons'] = S_cons.values()
            consensus_df['city'] = city
            consensus_df['algorithm_type'] = algorithm_type
            consensus_df['g_type'] = g_type
            consensus_df['geoid'] = S_cons.keys()
            consensus_df = consensus_df.set_index('geoid')
            
            # calculating modularity
            G = create_graphs(city, g_type)
            mod = community_louvain.modularity(S_cons,G)
            consensus_df['modularity'] = [mod] * len(S_cons.values()) # sorrend jó, ugye? KÉRDÉS
            
            
            #print('city')

            #csv_name = 'consensus_2_' + city + '_' + algorithm_type + '_' + g_type + '.csv'
            #consensus_df.to_csv('../data/'+ csv_name)
            all_consensus_df = pd.concat([all_consensus_df, consensus_df])

all_consensus_df.to_csv('../data/all_cons.csv')

new_york
mob
ms
Reading in necessary data...
Done. 0.04
Creating all possible node pairs...
Done. 9.35
Joining interation results to node pairs...
Done. 18.03
Counting same partitioning for node pairs...
Calculating difference...
Done. 18.72
Getting number of zero differences...
Done. 0.85
Last Louvain...
Creating graph...
Done. 66.36
Running Louvain...
Done. 250.59


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


new_york
mob
mgn
Reading in necessary data...
Done. 0.02
Creating all possible node pairs...
Done. 8.87
Joining interation results to node pairs...
Done. 18.78
Counting same partitioning for node pairs...
Calculating difference...
Done. 19.79
Getting number of zero differences...
Done. 0.88
Last Louvain...
Creating graph...
Done. 66.50
Running Louvain...
Done. 218.02
new_york
fol_hh
ms
Reading in necessary data...
Done. 0.02
Creating all possible node pairs...
Done. 7.79
Joining interation results to node pairs...
Done. 16.25
Counting same partitioning for node pairs...
Calculating difference...
Done. 17.51
Getting number of zero differences...
Done. 0.78
Last Louvain...
Creating graph...
Done. 59.98
Running Louvain...
Done. 424.90


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


new_york
fol_hh
mgn
Reading in necessary data...
Done. 0.03
Creating all possible node pairs...
Done. 7.87
Joining interation results to node pairs...
Done. 16.43
Counting same partitioning for node pairs...
Calculating difference...
Done. 17.67
Getting number of zero differences...
Done. 0.79
Last Louvain...
Creating graph...
Done. 59.19
Running Louvain...
Done. 199.45
los_angeles
mob
ms
Reading in necessary data...
Done. 0.06
Creating all possible node pairs...
Done. 3.22
Joining interation results to node pairs...
Done. 6.29
Counting same partitioning for node pairs...
Calculating difference...
Done. 7.32
Getting number of zero differences...
Done. 0.31
Last Louvain...
Creating graph...
Done. 25.80
Running Louvain...
Done. 97.24
los_angeles
mob
mgn
Reading in necessary data...
Done. 0.06
Creating all possible node pairs...
Done. 3.31
Joining interation results to node pairs...
Done. 5.88
Counting same partitioning for node pairs...
Calculating difference...
Done. 7.31
Getting number

Done. 0.98
Counting same partitioning for node pairs...
Calculating difference...
Done. 1.69
Getting number of zero differences...
Done. 0.07
Last Louvain...
Creating graph...
Done. 6.26
Running Louvain...
Done. 21.73
philadelphia
fol_hh
ms
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.66
Joining interation results to node pairs...
Done. 1.02
Counting same partitioning for node pairs...
Calculating difference...
Done. 1.67
Getting number of zero differences...
Done. 0.07
Last Louvain...
Creating graph...
Done. 6.23
Running Louvain...
Done. 26.49
philadelphia
fol_hh
mgn
Reading in necessary data...
Done. 0.03
Creating all possible node pairs...
Done. 0.68
Joining interation results to node pairs...
Done. 1.02
Counting same partitioning for node pairs...
Calculating difference...
Done. 1.66
Getting number of zero differences...
Done. 0.07
Last Louvain...
Creating graph...
Done. 6.27
Running Louvain...
Done. 18.99
houston
mob
ms
Reading in necessary d

Done. 2.84
Running Louvain...
Done. 10.15
providence
mob
ms
Reading in necessary data...
Done. 0.03
Creating all possible node pairs...
Done. 0.06
Joining interation results to node pairs...
Done. 0.06
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.11
Getting number of zero differences...
Done. 0.01
Last Louvain...
Creating graph...
Done. 0.39
Running Louvain...
Done. 1.68
providence
mob
mgn
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.07
Joining interation results to node pairs...
Done. 0.08
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.14
Getting number of zero differences...
Done. 0.01
Last Louvain...
Creating graph...
Done. 0.48
Running Louvain...
Done. 1.54
providence
fol_hh
ms
Reading in necessary data...
Done. 0.13
Creating all possible node pairs...
Done. 0.06
Joining interation results to node pairs...
Done. 0.06
Counting same partitioning for node pairs...
Calculating diffe

Done. 0.20
Joining interation results to node pairs...
Done. 0.26
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.49
Getting number of zero differences...
Done. 0.02
Last Louvain...
Creating graph...
Done. 1.64
Running Louvain...
Done. 8.42
seattle
fol_hh
mgn
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.20
Joining interation results to node pairs...
Done. 0.23
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.47
Getting number of zero differences...
Done. 0.02
Last Louvain...
Creating graph...
Done. 1.63
Running Louvain...
Done. 5.26
minneapolis
mob
ms
Reading in necessary data...
Done. 4.59
Creating all possible node pairs...
Done. 0.23
Joining interation results to node pairs...
Done. 0.31
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.56
Getting number of zero differences...
Done. 0.02
Last Louvain...
Creating graph...
Done. 1.80
Running Louvain...
Done.

Done. 1.51
Running Louvain...
Done. 7.64
pittsburgh
mob
mgn
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.16
Joining interation results to node pairs...
Done. 0.21
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.39
Getting number of zero differences...
Done. 0.02
Last Louvain...
Creating graph...
Done. 1.44
Running Louvain...
Done. 5.79
pittsburgh
fol_hh
ms
Reading in necessary data...
Done. 0.04
Creating all possible node pairs...
Done. 0.19
Joining interation results to node pairs...
Done. 0.19
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.37
Getting number of zero differences...
Done. 0.02
Last Louvain...
Creating graph...
Done. 1.40
Running Louvain...
Done. 9.07
pittsburgh
fol_hh
mgn
Reading in necessary data...
Done. 0.03
Creating all possible node pairs...
Done. 0.20
Joining interation results to node pairs...
Done. 0.23
Counting same partitioning for node pairs...
Calculating di

Done. 0.11
Getting number of zero differences...
Done. 0.01
Last Louvain...
Creating graph...
Done. 0.39
Running Louvain...
Done. 1.51
cincinnati
mob
ms
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.07
Joining interation results to node pairs...
Done. 0.09
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.19
Getting number of zero differences...
Done. 0.01
Last Louvain...
Creating graph...
Done. 0.81
Running Louvain...
Done. 3.56
cincinnati
mob
mgn
Reading in necessary data...
Done. 2.99
Creating all possible node pairs...
Done. 0.11
Joining interation results to node pairs...
Done. 0.11
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.22
Getting number of zero differences...
Done. 0.01
Last Louvain...
Creating graph...
Done. 0.75
Running Louvain...
Done. 2.51
cincinnati
fol_hh
ms
Reading in necessary data...
Done. 0.04
Creating all possible node pairs...
Done. 0.07
Joining interation resul

Done. 1.73
indianapolis
fol_hh
ms
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.07
Joining interation results to node pairs...
Done. 0.06
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.12
Getting number of zero differences...
Done. 0.01
Last Louvain...
Creating graph...
Done. 0.51
Running Louvain...
Done. 3.15
indianapolis
fol_hh
mgn
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.08
Joining interation results to node pairs...
Done. 0.07
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.14
Getting number of zero differences...
Done. 0.01
Last Louvain...
Creating graph...
Done. 0.51
Running Louvain...
Done. 1.60
austin
mob
ms
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.07
Joining interation results to node pairs...
Done. 0.06
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.11
Getting 

Done. 0.26
Running Louvain...
Done. 1.42
memphis
mob
mgn
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.06
Joining interation results to node pairs...
Done. 0.05
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.08
Getting number of zero differences...
Done. 0.00
Last Louvain...
Creating graph...
Done. 0.27
Running Louvain...
Done. 1.02
memphis
fol_hh
ms
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.03
Joining interation results to node pairs...
Done. 0.03
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.07
Getting number of zero differences...
Done. 0.00
Last Louvain...
Creating graph...
Done. 0.25
Running Louvain...
Done. 1.29
memphis
fol_hh
mgn
Reading in necessary data...
Done. 0.02
Creating all possible node pairs...
Done. 0.03
Joining interation results to node pairs...
Done. 0.03
Counting same partitioning for node pairs...
Calculating difference.

Done. 0.11
Getting number of zero differences...
Done. 0.01
Last Louvain...
Creating graph...
Done. 0.41
Running Louvain...
Done. 1.55
buffalo
mob
ms
Reading in necessary data...
Done. 0.02
Creating all possible node pairs...
Done. 0.03
Joining interation results to node pairs...
Done. 0.04
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.08
Getting number of zero differences...
Done. 0.00
Last Louvain...
Creating graph...
Done. 0.28
Running Louvain...
Done. 1.19
buffalo
mob
mgn
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.03
Joining interation results to node pairs...
Done. 0.03
Counting same partitioning for node pairs...
Calculating difference...
Done. 0.07
Getting number of zero differences...
Done. 0.00
Last Louvain...
Creating graph...
Done. 0.24
Running Louvain...
Done. 0.92
buffalo
fol_hh
ms
Reading in necessary data...
Done. 0.01
Creating all possible node pairs...
Done. 0.05
Joining interation results to nod

# Adding data to overall table

In [None]:
# ITT TARTOK, ezek még nincsenek belerakva a tablazatba

In [37]:
tract_sum = cbsacode.groupby('clean_name')[['clean_name']].count() # number of tracts per city
tract_sum = tract_sum.rename(columns={'clean_name': 'sum_tracts'})

In [68]:
all_graphs_df = pd.DataFrame(columns=['clean_name','g_type','tracts_in_network'])
all_graphs_df.head()

Unnamed: 0,clean_name,g_type,tracts_in_network


In [69]:
graph_combs = product(city_l, ['mob','fol_hh'])

for city, g_type in graph_combs:
    G = create_graphs(city, g_type)
    all_graphs_df = all_graphs_df.append({'clean_name': city, 'g_type' : g_type, 'tracts_in_network' : len(G.nodes)}, ignore_index=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


In [71]:
tract_no_df = pd.merge(all_graphs_df, tract_sum, how = 'left', left_on = 'clean_name', right_index = True)

In [94]:
tract_no_df.head()

Unnamed: 0,clean_name,g_type,tracts_in_network,sum_tracts
0,new_york,mob,4603,4609
1,new_york,fol_hh,4389,4609
2,los_angeles,mob,2895,2890
3,los_angeles,fol_hh,2842,2890
4,riverside,mob,817,819


In [112]:
type(all_combs)

itertools.product

In [111]:
pd.DataFrame(all_combs)

In [120]:
all_combs = product(city_l, ['mob','fol_hh'], ['ms','mgn'])
city, g_type, algorithm_type = list(all_combs)[0]
city, g_type, algorithm_type

('new_york', 'mob', 'ms')

In [234]:
con_df = pd.read_csv('../data/all_cons.csv')

## To be deleted

In [102]:
# unique values in the network counted by side of edge

mob_u_h = mobility.tract_home.tolist()
mob_u_w = mobility.tract_work.tolist()
u_geoid_mob = set([*mob_u_h,*mob_u_w])
cbsacode['in_mob'] = cbsacode.geoid.isin(u_geoid_mob).astype(int)

fol_u_h = follow_hh.tract_home.tolist()
fol_u_h1 = follow_hh.tract_home_1.tolist()
u_geoid_fol = set([*fol_u_h,*fol_u_h1])
cbsacode['in_fol'] = cbsacode.geoid.isin(u_geoid_fol).astype(int)

In [113]:
netw_count = cbsacode.groupby('cbsacode')[['in_mob','in_fol']].sum()

In [114]:
city_df = pd.merge(tract_sum, netw_count, left_index=True, right_index=True)

In [12]:
tract = mobility.groupby('tract_home')[['cnt']].sum()

In [13]:
tract.head()

Unnamed: 0_level_0,cnt
tract_home,Unnamed: 1_level_1
14000US01007010001,9
14000US01007010002,14
14000US01007010003,10
14000US01007010004,22
14000US01009050101,21
14000US01009050102,29
14000US01009050200,20
14000US01009050300,12
14000US01009050400,1
14000US01009050500,17


In [None]:
%%writefile toto.m

function [a, b] = toto(c);
    a = c;
    b = c + 1;
end

In [None]:
a,b = octave.toto(3, nout=2)