# Population forecasting

1. Clean population dataframe
2. Transform dataframe
3. Forecast population using **fb prophet**

## 1. Clean population dataframe

In [None]:
import pandas as pd
import numpy as np

In [None]:
population = pd.read_csv('metropop_2010_2019.csv')

### Check population csv

In [None]:
print(population.shape)
population.head()

### Combine city state
- use explode to separate combined cities
- combine separated city and states

In [None]:
def explode_str(population, col='Metro-Area', sep='-'):
    s = population[col]
    i = np.arange(len(s)).repeat(s.str.count(sep) +1)
    return population.iloc[i].assign(**{col: sep.join(s).split(sep)})

population = explode_str(population)

In [None]:
population['Metro-Area'] = population['Metro-Area'].str.strip()

In [None]:
population['City,State'] = population['Metro-Area'] + ', ' + population['State']

### Drop unused columns

In [None]:
population = population.drop(columns = ['Census', 'Estimate Base', 'Metro-Area', 'State'])
population = population[['City,State', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']]

In [None]:
print(population.shape)
population.head()

## 2. Stack dataframe 
- this is to try the groupby so I don't have to create separate csv's and run each city csv separately
- https://stackoverflow.com/questions/64179626/stack-unstack-melt-pivot-transpose-what-is-the-simple-method-to-convert-mul

In [None]:
population_melt = (population.melt(id_vars=['City,State'],
                    var_name = 'ds',
                    value_name = 'y'
                    ).reset_index(drop=True))

In [None]:
population_melt

## 3. Forecast population using **fb prophet**

In [None]:
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot

In [None]:
grouped = population_melt.groupby('City,State')

In [None]:
final = pd.DataFrame()

In [None]:
for g in grouped.groups:
    group = grouped.get_group(g)
    m = Prophet()
    m.fit(group)
    print(group)
    future = m.make_future_dataframe(periods=10, freq='Y')
    forecast = m.predict(future)
    forecast = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    forecast = forecast.rename(columns={'yhat': 'yhat_'+g, 
                                        'yhat_lower': 'yhat_lower_'+g,
                                        'yhat_upper': 'yhat_upper_'+g})
    final = pd.merge(final, forecast.set_index('ds'), how='outer', left_index=True, right_index=True)

In [None]:
final = final[['yhat_'+g, 'yhat_lower_'+g, 'yhat_upper_'+g for g in grouped.groups.keys()]]

In [None]:
fig2 = m.plot_components(forecast)