In [1]:
import numpy as np
import pandas as pd

### DataFrame Setup

In [2]:
data = pd.read_csv("migration_2021.csv")

### getPF(A) Function

In [3]:
def getPF(A):
    """
    Uses Perron-Frobenius to find the ranking eigenvector
    Input: A matrix A to have assumed to satisfy PF
    Output: The corresponding eigenvector for PF
    """
    E,V = np.linalg.eig(A)
    E = np.absolute(E)
    index = np.argmax(E)
    return np.absolute(V[:,index])

### Part 1, Naive Method

In [8]:
def naive_state_rank(df):
    n,m = df.shape
    data_array = df.values
    states = df['State'].values
    
    W = np.zeros((n,n)) # W will become the df without total population or state

    for i in range(n):
        for j in range(m - n, m):
            W[i, j - (m - n)] = data_array[i, j]
    
    ranking = getPF(W) # Appies Perron-Frobenius to W
    naive_rank = np.column_stack((states,ranking))
    naive_data = {'States': naive_rank[:,0], 'Quality': naive_rank[:,1]}
    
    naive_df = pd.DataFrame(naive_data)
    naive_state_rankings = naive_df.sort_values(by='Quality',ascending=False) # Created rankings
    return naive_state_rankings

In [9]:
naive_state_rank(data)

Unnamed: 0,States,Quality
9,Florida,0.421375
43,Texas,0.387413
4,California,0.286957
33,North Carolina,0.255187
10,Georgia,0.247327
32,New York,0.20681
46,Virginia,0.194192
5,Colorado,0.191077
2,Arizona,0.181999
38,Pennsylvania,0.17787


### Part 2, Perron-Frobenius

In [10]:
def state_rank(df,low_pop=1.5e+06):
    """
    Uses Perron-Frobenius to rank states in terms of quality related to their ability to attract people
    Inputs: DataFrame and a lower threshold population low_pop
    Output: Returns a DataFrame that lists only the states with population above low_pop 
    and then orders those remaining states via Perron-Frobenius
    """
    data = df.copy()
    
    data.loc[data['Population'] < low_pop, 'Population'] = 0 # States w/ population out of lower threshold = 0
    
    deleted_row_labels = []  # List to store row indices being deleted

    for index, row in data.iterrows():
        if row['Population'] == 0:
            deleted_row_labels.append(row['State'])# Store state name that is being deleted
            data.drop(index, inplace=True)  # Remove row with Population == 0
            
    data.drop(columns=deleted_row_labels, inplace=True) # Removes the correspondin column to the dropped state
    
    n,m = data.shape
    data_array = data.values
    
    new_pops = data['Population'].values # Lists the refined populations
    new_states = data['State'].values # Lists the refined states
    
    W = np.zeros((n,n)) # W will become the df without total population or state
    
    for i in range(n):
        for j in range(m - n, m):
            W[i, j - (m - n)] = data_array[i, j]
    
    for i in range(n):
        W[i,:] = W[i,:]/new_pops[i] # Normalize by dividing the 'wins' of a state by the population of the state

    ranking = getPF(W) # Appies Perron-Frobenius to W
    
    ranks = np.column_stack((new_states,ranking))
    new_data = {'States': ranks[:,0], 'Quality': ranks[:,1]}
    new_df = pd.DataFrame(new_data)
    state_rankings = new_df.sort_values(by='Quality',ascending=False) # Created rankings
    return state_rankings

In [12]:
state_rank(data,low_pop=3e+06)

Unnamed: 0,States,Quality
25,South Carolina,0.312773
3,Colorado,0.273328
20,North Carolina,0.249256
17,Nevada,0.237428
29,Virginia,0.228344
26,Tennessee,0.222006
6,Georgia,0.217366
5,Florida,0.211228
1,Arizona,0.210317
28,Utah,0.188832
