In [2]:
import pandas as pd
import networkx as nx

In [3]:
# do normal pagerank
non_cities = ['Outside Metro Area within U.S. or Puerto Rico', 'Africa', 'Asia', 'Central America', 'Caribbean', 'Europe', 'U.S. Island Areas', 'Northern America', 'Oceania and At Sea', 'South America']
G = nx.DiGraph()

x = pd.ExcelFile('../data/metro-to-metro-2011-2015.xlsx')
print(x.sheet_names)

previous = 'Unnamed: 15'
current = 'Unnamed: 2'
count = 'Unnamed: 26'

thresh = 0

for state in x.sheet_names:
    df = x.parse(state)

    for index, row in df.iterrows():

        # skip first three rows: header rows
        if index < 3:
            continue
        # skip the last rows: footer rows
        if index >= 53724:
            break
            
        if int(row[count]) < thresh:
            continue

        # exclude non-cities
        if row[previous] in non_cities or row[current] in non_cities:
            continue
            
        G.add_edge(row[previous], row[current], weight=row[count])

print(len(G.nodes()))
print(len(G.edges()))

['Metro-to-Metro 2011-2015']
388
50171


In [4]:
pr = nx.pagerank(G, max_iter=1000)
pr_df = pd.DataFrame.from_dict(pr, orient='index', columns=['pr']).reset_index().rename(columns={'index':'metro'})
#pr_df = pr_df.sort_values(by='pr', ascending=False).reset_index(drop=True)
#pr_df['Original Rank'] = pr_df.index + 1
print(pr_df)

                                                metro        pr
0                          Albuquerque, NM Metro Area  0.003073
1                              Abilene, TX Metro Area  0.001535
2        Allentown-Bethlehem-Easton, PA-NJ Metro Area  0.002509
3                             Amarillo, TX Metro Area  0.001160
4                            Anchorage, AK Metro Area  0.001928
5        Atlanta-Sandy Springs-Roswell, GA Metro Area  0.017466
6                    Austin-Round Rock, TX Metro Area  0.010771
7                          Bakersfield, CA Metro Area  0.003282
8            Baltimore-Columbia-Towson, MD Metro Area  0.007384
9                          Baton Rouge, LA Metro Area  0.003427
10                Beaumont-Port Arthur, TX Metro Area  0.002018
11                   Birmingham-Hoover, AL Metro Area  0.004277
12                          Boise City, ID Metro Area  0.002743
13          Boston-Cambridge-Newton, MA-NH Metro Area  0.010782
14               Brownsville-Harlingen, 

In [5]:
weighted_pr = pd.read_csv('../results/weighted_income_pagerank.csv')
#weighted_pr = weighted_pr.sort_values(by='pr', ascending=False).reset_index(drop=True)
#weighted_pr['Weighted Rank'] = weighted_pr.index + 1
print(weighted_pr)

                                                 metro        pr
0        Los Angeles-Long Beach-Anaheim, CA Metro Area  0.021176
1     New York-Newark-Jersey City, NY-NJ-PA Metro Area  0.019864
2           Dallas-Fort Worth-Arlington, TX Metro Area  0.019688
3    Washington-Arlington-Alexandria, DC-VA-MD-WV M...  0.018384
4      Houston-The Woodlands-Sugar Land, TX Metro Area  0.017473
5         Atlanta-Sandy Springs-Roswell, GA Metro Area  0.017056
6        Chicago-Naperville-Elgin, IL-IN-WI Metro Area  0.015999
7               Phoenix-Mesa-Scottsdale, AZ Metro Area  0.015701
8               Seattle-Tacoma-Bellevue, WA Metro Area  0.013660
9      Riverside-San Bernardino-Ontario, CA Metro Area  0.013246
10        San Francisco-Oakland-Hayward, CA Metro Area  0.013170
11   Miami-Fort Lauderdale-West Palm Beach, FL Metr...  0.012076
12               Denver-Aurora-Lakewood, CO Metro Area  0.011797
13   Philadelphia-Camden-Wilmington, PA-NJ-DE-MD Me...  0.011485
14      Tampa-St. Petersb

In [6]:
coords = pd.read_csv('../coordinates.txt', sep='\t')
coords['Location'] = coords['Location'].str.strip()
print(coords)

                                             Location  \
0                          Albuquerque, NM Metro Area   
1        Allentown-Bethlehem-Easton, PA-NJ Metro Area   
2                             Amarillo, TX Metro Area   
3                            Anchorage, AK Metro Area   
4        Atlanta-Sandy Springs-Roswell, GA Metro Area   
5                    Austin-Round Rock, TX Metro Area   
6                          Bakersfield, CA Metro Area   
7            Baltimore-Columbia-Towson, MD Metro Area   
8                          Baton Rouge, LA Metro Area   
9                 Beaumont-Port Arthur, TX Metro Area   
10                   Birmingham-Hoover, AL Metro Area   
11                          Boise City, ID Metro Area   
12          Boston-Cambridge-Newton, MA-NH Metro Area   
13               Brownsville-Harlingen, TX Metro Area   
14   Buffalo-Cheektowaga-Niagara Falls, NY Metro Area   
15         Charleston-North Charleston, SC Metro Area   
16       Charlotte-Concord-Gast

In [7]:
delta_pr = pd.merge(pr_df, weighted_pr, how='inner', left_on='metro', right_on='metro')
delta_pr['Delta PR'] = (delta_pr['pr_y'] - delta_pr['pr_x']) * 100000
delta_pr = delta_pr[['metro', 'Delta PR']]
print(delta_pr)

                                                metro   Delta PR
0                          Albuquerque, NM Metro Area   3.093486
1                              Abilene, TX Metro Area   1.652197
2        Allentown-Bethlehem-Easton, PA-NJ Metro Area  -7.559256
3                             Amarillo, TX Metro Area   1.664570
4                            Anchorage, AK Metro Area   1.743735
5        Atlanta-Sandy Springs-Roswell, GA Metro Area -40.986271
6                    Austin-Round Rock, TX Metro Area  -0.020466
7                          Bakersfield, CA Metro Area   2.857240
8            Baltimore-Columbia-Towson, MD Metro Area  -7.486664
9                          Baton Rouge, LA Metro Area   2.926351
10                Beaumont-Port Arthur, TX Metro Area   1.291637
11                   Birmingham-Hoover, AL Metro Area   1.415922
12                          Boise City, ID Metro Area   3.247536
13          Boston-Cambridge-Newton, MA-NH Metro Area -12.161240
14               Brownsvi

In [8]:
delta_pr_with_coords = pd.merge(delta_pr, coords, how='inner', left_on='metro', right_on='Location')[['metro', 'Delta PR', 'Coordinates']]
print(delta_pr_with_coords)

                                                metro   Delta PR  \
0                          Albuquerque, NM Metro Area   3.093486   
1                              Abilene, TX Metro Area   1.652197   
2        Allentown-Bethlehem-Easton, PA-NJ Metro Area  -7.559256   
3                             Amarillo, TX Metro Area   1.664570   
4                            Anchorage, AK Metro Area   1.743735   
5        Atlanta-Sandy Springs-Roswell, GA Metro Area -40.986271   
6                    Austin-Round Rock, TX Metro Area  -0.020466   
7                          Bakersfield, CA Metro Area   2.857240   
8            Baltimore-Columbia-Towson, MD Metro Area  -7.486664   
9                          Baton Rouge, LA Metro Area   2.926351   
10                Beaumont-Port Arthur, TX Metro Area   1.291637   
11                   Birmingham-Hoover, AL Metro Area   1.415922   
12                          Boise City, ID Metro Area   3.247536   
13          Boston-Cambridge-Newton, MA-NH Metro

In [9]:
pr_increased = delta_pr_with_coords[delta_pr_with_coords['Delta PR'] > 0]
pr_increased = pr_increased.sort_values(by='Delta PR', ascending=False).reset_index(drop=True)
print(pr_increased)
pr_increased.to_csv('../results/pr_increased_vals_coords.tsv', sep='\t', index=False)

                                                 metro  Delta PR  \
0       Portland-Vancouver-Hillsboro, OR-WA Metro Area  8.681262   
1               Phoenix-Mesa-Scottsdale, AZ Metro Area  7.877945   
2               Seattle-Tacoma-Bellevue, WA Metro Area  5.751308   
3          Indianapolis-Carmel-Anderson, IN Metro Area  5.536568   
4    Minneapolis-St. Paul-Bloomington, MN-WI Metro ...  5.467740   
5                Denver-Aurora-Lakewood, CO Metro Area  5.109336   
6    Sacramento--Roseville--Arden-Arcade, CA Metro ...  5.094183   
7      Riverside-San Bernardino-Ontario, CA Metro Area  5.038305   
8                        Kansas City, MO-KS Metro Area  5.005904   
9        Los Angeles-Long Beach-Anaheim, CA Metro Area  3.926141   
10               Spokane-Spokane Valley, WA Metro Area  3.765543   
11       San Jose-Sunnyvale-Santa Clara, CA Metro Area  3.756221   
12                                Salem, OR Metro Area  3.709127   
13                               Fresno, CA Metr

In [10]:
pr_decreased = delta_pr_with_coords[delta_pr_with_coords['Delta PR'] < 0]
pr_decreased['Delta PR'] = pr_decreased['Delta PR'].abs()
pr_decreased = pr_decreased.sort_values(by='Delta PR', ascending=False).reset_index(drop=True)
print(pr_decreased)
pr_decreased.to_csv('../results/pr_decreased_vals_coords.tsv', sep='\t', index=False)

                                                metro   Delta PR  \
0             San Juan-Carolina-Caguas, PR Metro Area  64.338113   
1            Orlando-Kissimmee-Sanford, FL Metro Area  57.686299   
2        Atlanta-Sandy Springs-Roswell, GA Metro Area  40.986271   
3    New York-Newark-Jersey City, NY-NJ-PA Metro Area  39.622640   
4   Miami-Fort Lauderdale-West Palm Beach, FL Metr...  27.752894   
5                             Mayagüez, PR Metro Area  27.130691   
6                        Warner Robins, GA Metro Area  22.750661   
7   Philadelphia-Camden-Wilmington, PA-NJ-DE-MD Me...  22.339719   
8                                Ponce, PR Metro Area  20.879147   
9      Tampa-St. Petersburg-Clearwater, FL Metro Area  20.517524   
10                   Aguadilla-Isabela, PR Metro Area  16.582345   
11          Boston-Cambridge-Newton, MA-NH Metro Area  12.161240   
12                             Arecibo, PR Metro Area  11.078112   
13                        Jacksonville, FL Metro

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  
