In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats

In [None]:
filepath = "Resources/final_data.csv"

In [None]:
total_df = pd.read_csv(filepath)

In [None]:
pd.set_option('display.max_columns', 50)

In [None]:
total_df

## First we calculate how much the U.S is growing as a whole with and without migration 

In [None]:
# Make a dataframe to compute the annual changes
us_totals_df = pd.DataFrame({'Year' : range(2010,2020)})

In [None]:
# Total population per year
us_total_population = total_df[['POPESTIMATE2010','POPESTIMATE2011','POPESTIMATE2012','POPESTIMATE2013','POPESTIMATE2014','POPESTIMATE2015','POPESTIMATE2016','POPESTIMATE2017','POPESTIMATE2018','POPESTIMATE2019']].sum()
total_population =  us_total_population.to_list()

In [None]:
# Create a list of the total number of houses in the U.S per year
us_houses_by_year = total_df[['HUESTIMATE2010','HUESTIMATE2011','HUESTIMATE2012','HUESTIMATE2013','HUESTIMATE2014','HUESTIMATE2015','HUESTIMATE2016','HUESTIMATE2017','HUESTIMATE2018','HUESTIMATE2019']].sum()
total_houses =  us_houses_by_year.to_list()

In [None]:
# Create a list of the total number of migrants in the U.S per year
us_migrants_by_year = total_df[['NETMIG2010','NETMIG2011','NETMIG2012','NETMIG2013','NETMIG2014','NETMIG2015','NETMIG2016','NETMIG2017','NETMIG2018','NETMIG2019']].sum()
us_migrants = us_migrants_by_year.to_list()

In [None]:
# Create a list of annual means for the whole country's housing cost per year
us_mean_house_cost_by_year = total_df[['2010 Mean Housing','2011 Mean Housing','2012 Mean Housing','2013 Mean Housing','2014 Mean Housing','2015 Mean Housing','2016 Mean Housing','2017 Mean Housing','2018 Mean Housing','2019 Mean Housing']].mean()
mean_house_value = us_mean_house_cost_by_year.to_list()

In [None]:
# Populate table with the data
us_totals_df['Total Population per Year'] = total_population
us_totals_df['Total Houses per Year'] = total_houses
us_totals_df['Total Migrants per Year'] = us_migrants

In [None]:
us_totals_df

In [None]:
us_totals_df['Total Base Population per Year'] = us_totals_df['Total Population per Year'] - us_totals_df['Total Migrants per Year']

In [None]:
us_totals_df['Mean Housing Cost per Year'] = mean_house_value

In [None]:
#us_totals_df

In [None]:

us_totals_df['Houses per Capita'] = us_totals_df['Total Houses per Year']/us_totals_df['Total Population per Year']

In [None]:
#us_totals_df

In [None]:
# Using small data numbers from https://www.statista.com/statistics/183657/average-size-of-a-family-in-the-us/
us_totals_df['Average Family Size'] = [3.16, 3.18, 3.13, 3.12, 3.13, 3.14, 3.14, 3.14, 3.14, 3.14]
#us_totals_df

In [None]:
# Calculate the number of houses available for the average family using available houses per person times the number of people
us_totals_df['Houses per Family'] = us_totals_df['Houses per Capita'] * us_totals_df['Average Family Size']
#us_totals_df.set_index('Year', inplace=True)
#us_totals_df

In [None]:
us_totals_df['Percent Total Migration'] = us_totals_df['Total Migrants per Year']/us_totals_df['Total Population per Year']

In [None]:
us_totals_df

# Gather summary data for the US

In [None]:
us_total_migration = us_totals_df['Total Migrants per Year'].sum()
us_total_migration

In [None]:
us_total_mean_migration = us_totals_df['Total Migrants per Year'].sum()/51
us_total_mean_migration

In [None]:
# Migration percent of total growth
us_total_pop_change = (us_totals_df['Total Population per Year'].max() - us_totals_df['Total Population per Year'].min())
us_total_pop_change

In [None]:
us_migration_percentage_of_growth = us_total_migration/us_total_pop_change
us_migration_percentage_of_growth

In [None]:
us_change_in_housing_cost = us_totals_df.iloc[ 9 ,4] - us_totals_df.iloc[ 0 ,4]
us_change_in_housing_cost

In [None]:
us_percent_change_in_house_cost = us_change_in_housing_cost/us_totals_df.iloc[ 0 ,4]
us_percent_change_in_house_cost

In [None]:
# Create a new Dataframe of changes
us_changes_df = pd.DataFrame({'Year' : range(2010,2020)})
#us_changes_df

In [None]:
# Populate the changes dataframe
us_changes_df['Base Population Change'] = us_totals_df['Total Base Population per Year'].diff().tolist()
us_changes_df['Change in Houses per Year'] = us_totals_df['Total Houses per Year'].diff().tolist()
us_changes_df['Change in Houses Cost per Year'] = us_totals_df['Mean Housing Cost per Year'].diff().tolist()
us_changes_df['Change in Migration per Year'] = us_totals_df['Total Migrants per Year'].diff().tolist()
us_changes_df['Change in Houses per Capita'] = us_totals_df['Houses per Capita'].diff().tolist()

In [None]:
us_changes_df

In [None]:
us_percent_change_df = pd.DataFrame({'Year' : range(2010,2020)})

In [None]:
us_percent_change_df['Base Population Change'] = us_totals_df['Total Base Population per Year'].pct_change().tolist()
us_percent_change_df['Change in Houses per Year'] = us_totals_df['Total Houses per Year'].pct_change().tolist()
us_percent_change_df['Change in Houses Cost per Year'] = us_totals_df['Mean Housing Cost per Year'].pct_change().tolist()
us_percent_change_df['Change in Migration per Year'] = us_totals_df['Total Migrants per Year'].pct_change().tolist()
us_percent_change_df['Change in Houses per Capita'] = us_totals_df['Houses per Capita'].pct_change().tolist()

In [None]:
us_percent_change_df

## Look at the changes from year to year for each state and compare to the U.S. as a whole

##  First we look at each state's total migration over the ten years

In [None]:
states_mig = pd.DataFrame({ 'State' : total_df['State'] })
#states_mig

In [None]:
states_mig = states_mig.join(total_df[['NETMIG2010', 'NETMIG2011', 'NETMIG2012', 'NETMIG2013', 'NETMIG2014'
                     , 'NETMIG2015', 'NETMIG2016', 'NETMIG2017', 'NETMIG2018', 'NETMIG2019']] )
states_mig

In [None]:
states_mig = states_mig.join(total_df[['POPESTIMATE2010','POPESTIMATE2011','POPESTIMATE2012','POPESTIMATE2013','POPESTIMATE2014','POPESTIMATE2015','POPESTIMATE2016','POPESTIMATE2017','POPESTIMATE2018','POPESTIMATE2019']])

states_mig

In [None]:
for year in range(2010,2020):
    states_mig[f'{year} Percent Migration'] = states_mig[f'NETMIG{year}']/states_mig[f'POPESTIMATE{year}']

In [None]:
states_mig

# Now looking at Housing

In [None]:
states_housing = pd.DataFrame({ 'State' : total_df['State'] })

In [None]:
state_housing = states_housing.join(total_df[['2010 Mean Housing','2011 Mean Housing','2012 Mean Housing','2013 Mean Housing'
                         ,'2014 Mean Housing','2015 Mean Housing','2016 Mean Housing','2017 Mean Housing','2018 Mean Housing'
                         ,'2019 Mean Housing']])
#state_housing

In [None]:
state_housing['House Cost Change'] = state_housing['2019 Mean Housing'] - state_housing['2010 Mean Housing']

In [None]:
states_housing_percent_change = pd.DataFrame({ 'State' : total_df['State'] })

In [None]:
# Create variable to hold the state names that meet our criteria of migration rates more than 50% higher than average
high_migration_in_states = []
average_migration_states = []
high_migration_out_states = []

In [None]:
# Separating out the states
for state in range(len(states_pop.index)):
    if (states_pop.iloc[state, 4 ] >= 0) & (states_pop.iloc[state, 5 ] > 0):
        if states_pop.iloc[state, 4 ]/states_pop.iloc[state, 5 ] > 1.5 * us_migration_percentage_of_growth:
            high_migration_in_states.append(states_pop['State'][state])
        else:
            average_migration_states.append(states_pop['State'][state])
    elif (states_pop.iloc[state, 4 ] < 0) & (states_pop.iloc[state, 5 ] > 0):
        if -states_pop.iloc[state, 4 ]/states_pop.iloc[state, 5 ] > 1.5 * us_migration_percentage_of_growth:
            high_migration_out_states.append(states_pop['State'][state])
        else:
            average_migration_states.append(states_pop['State'][state])
    elif (states_pop.iloc[state, 4 ] >= 0) & (states_pop.iloc[state, 5 ] < 0):
        if -states_pop.iloc[state, 4 ]/states_pop.iloc[state, 5 ] > 1.5 * us_migration_percentage_of_growth:
            high_migration_in_states.append(states_pop['State'][state])
        else:
            average_migration_states.append(states_pop['State'][state])
    elif (states_pop.iloc[state, 4 ] < 0) & (states_pop.iloc[state, 5 ] < 0):
        if states_pop.iloc[state, 4 ]/states_pop.iloc[state, 5 ] > 1.5 * us_migration_percentage_of_growth:
            high_migration_out_states.append(states_pop['State'][state])
        else:
            average_migration_states.append(states_pop['State'][state])
    else:
        print(f'Error in determining {state}. Skipping...')