# Population forecasting

1. Clean population dataframe
2. Transform dataframe
3. Forecast population using **fb prophet**

## 1. Clean population dataframe

In [1]:
import pandas as pd
import numpy as np



In [2]:
population = pd.read_csv('metropop_2010_2019.csv')

### Check population csv

In [3]:
print(population.shape)
population.head()

(415, 14)


Unnamed: 0,Metro-Area,State,Census,Estimate Base,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,Abilene,TX,165252.0,165252.0,165585.0,166634.0,167442.0,167473.0,168342.0,169688.0,170017.0,170429.0,171150.0,172060.0
1,Akron,OH,703200.0,703196.0,703031.0,703200.0,702109.0,703621.0,704908.0,704382.0,703524.0,703987.0,703855.0,703479.0
2,Albany,GA,153857.0,154033.0,154145.0,154545.0,153976.0,152667.0,151949.0,150387.0,149137.0,148090.0,147840.0,146726.0
3,Albany-Lebanon,OR,116672.0,116681.0,116891.0,118164.0,118273.0,118405.0,119042.0,120236.0,122769.0,125035.0,127451.0,129749.0
4,Albany-Schenectady-Troy,NY,870716.0,870713.0,871082.0,872778.0,874698.0,877065.0,878113.0,879085.0,879792.0,882158.0,882263.0,880381.0


### Combine city state
- use explode to separate combined cities
- combine separated city and states

In [4]:
def explode_str(population, col='Metro-Area', sep='-'):
    s = population[col]
    i = np.arange(len(s)).repeat(s.str.count(sep) +1)
    return population.iloc[i].assign(**{col: sep.join(s).split(sep)})

population = explode_str(population)

In [5]:
population['Metro-Area'] = population['Metro-Area'].str.strip()

In [6]:
population['City,State'] = population['Metro-Area'] + ', ' + population['State']

### Drop unused columns

In [7]:
population = population.drop(columns = ['Census', 'Estimate Base', 'Metro-Area', 'State'])
population = population[['City,State', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']]

In [8]:
print(population.shape)
population.head()

(654, 11)


Unnamed: 0,"City,State",2010,2011,2012,2013,2014,2015,2016,2017,2018,2019
0,"Abilene, TX",165585.0,166634.0,167442.0,167473.0,168342.0,169688.0,170017.0,170429.0,171150.0,172060.0
1,"Akron, OH",703031.0,703200.0,702109.0,703621.0,704908.0,704382.0,703524.0,703987.0,703855.0,703479.0
2,"Albany, GA",154145.0,154545.0,153976.0,152667.0,151949.0,150387.0,149137.0,148090.0,147840.0,146726.0
3,"Albany, OR",116891.0,118164.0,118273.0,118405.0,119042.0,120236.0,122769.0,125035.0,127451.0,129749.0
3,"Lebanon, OR",116891.0,118164.0,118273.0,118405.0,119042.0,120236.0,122769.0,125035.0,127451.0,129749.0


## 2. Stack dataframe 
- this is to try the groupby so I don't have to create separate csv's and run each city csv separately
- https://stackoverflow.com/questions/64179626/stack-unstack-melt-pivot-transpose-what-is-the-simple-method-to-convert-mul

In [None]:
population_melt = (population.melt(id_vars=['City,State'],
                    var_name = 'ds',
                    value_name = 'y'
                    ).reset_index(drop=True))

In [None]:
population_melt

## 3. Forecast population using **fb prophet**

In [None]:
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot

In [None]:
grouped = population_melt.groupby('City,State')

In [None]:
final = pd.DataFrame()

In [13]:
for g in grouped.groups:
    group = grouped.get_group(g)
    m = Prophet()
    m.fit(group)
    print(group)
    future = m.make_future_dataframe(periods=10, freq='Y')
    forecast = m.predict(future)
    forecast = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    forecast = forecast.rename(columns={'yhat': 'yhat_'+g, 
                                        'yhat_lower': 'yhat_lower_'+g,
                                        'yhat_upper': 'yhat_upper_'+g})
    final = pd.merge(final, forecast.set_index('ds'), how='outer', left_index=True, right_index=True)

INFO:fbprophet:n_changepoints greater than number of observations. Using 7.
INFO:fbprophet:n_changepoints greater than number of observations. Using 15.


In [None]:
final = final[['yhat_'+g, 'yhat_lower_'+g, 'yhat_upper_'+g for g in grouped.groups.keys()]]

In [None]:
fig2 = m.plot_components(forecast)