# Population forecasting

1. Clean population dataframe
2. Transform dataframe
3. Forecast population using **fb prophet**

## 1. Clean population dataframe

In [None]:
import pandas as pd
import numpy as np

In [None]:
population = pd.read_csv('https://raw.githubusercontent.com/Lambda-School-Labs/PT17_cityspire-a-ds/main/notebooks/datasets/data/population2010-2019/metropop_2010_2019.csv')

### Check population csv

In [None]:
print(population.shape)
population.head()

### Combine city state
- use explode to separate combined cities
- combine separated city and states

In [None]:
def explode_str(population, col='Metro-Area', sep='-'):
    s = population[col]
    i = np.arange(len(s)).repeat(s.str.count(sep) +1)
    return population.iloc[i].assign(**{col: sep.join(s).split(sep)})

population = explode_str(population)

In [None]:
population['Metro-Area'] = population['Metro-Area'].str.strip()

In [None]:
def explode_str(population, col='State', sep='-'):
    s = population[col]
    i = np.arange(len(s)).repeat(s.str.count(sep) +1)
    return population.iloc[i].assign(**{col: sep.join(s).split(sep)})

population = explode_str(population)

In [None]:
population['State'] = population['State'].str.strip()

In [None]:
print(population['Metro-Area'].nunique())
population['Metro-Area'].unique()

In [None]:
population = population[population['Metro-Area'] != '']

In [None]:
print(population['State'].nunique())
population['State'].unique()

In [None]:
population['City,State'] = population['Metro-Area'] + ', ' + population['State']

In [None]:
population.shape

### Drop duplicate rows and unused columns

In [None]:
population[population.duplicated(subset=['City,State'], keep=False)]

In [None]:
population = population.drop_duplicates(subset=['City,State' ], keep='last')
print(population.shape)

In [None]:
population = population.drop(columns = ['Census', 'Estimate Base', 'Metro-Area', 'State'])
population = population[['City,State', '2010', '2011', '2012', '2013', '2014', '2015', '2016', '2017', '2018', '2019']]

In [None]:
print(population.shape)
population.head()

## 2. Stack dataframe 
- this is to try the groupby so I don't have to create separate csv's and run each city csv separately
- https://stackoverflow.com/questions/64179626/stack-unstack-melt-pivot-transpose-what-is-the-simple-method-to-convert-mul

In [None]:
population_melt = (population.melt(id_vars=['City,State'],
                    var_name = 'ds',
                    value_name = 'y'
                    ).reset_index(drop=True))

In [None]:
population_melt

## 3. Forecast population using **fb prophet**

In [None]:
from fbprophet import Prophet
from fbprophet.plot import add_changepoints_to_plot

In [None]:
cities_list = list(population['City,State'])

In [None]:
def rnd_series(city):
    subset = population_melt[population_melt['City,State']== city]
    dates = (pd.DataFrame({'ds': pd.to_datetime(population_melt['ds'])}))
    
    return subset

In [None]:
series = [rnd_series(city) for city in cities_list]

In [None]:
len(series)

In [None]:
series[0]

In [None]:
def run_prophet(series):
    model = Prophet(daily_seasonality=False,
                    weekly_seasonality=False,
                    yearly_seasonality=False)
    model.fit(series)
    forecast = model.make_future_dataframe(periods=10, freq='Y')
    forecast = model.predict(forecast)
    forecast = forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']]
    forecast['City,State'] = series['City,State'].iloc[0]
    forecast = forecast[['City,State','ds', 'yhat', 'yhat_lower', 'yhat_upper']]

    return forecast

In [None]:
f = run_prophet(series[0])
f.head()

In [None]:
from time import time, ctime

In [None]:
start = ctime()

for i in range(len(series)):
    f = run_prophet(series[i])
    f.to_csv('population_prediction.csv', mode='a', index='False')

end = ctime()
print(f'Total time: {end-start} = {end} - {start}')

In [None]:
predictions = pd.read_csv('population_prediction.csv')

In [None]:
prediction.loc[prediction['City,State'] == 'City,State']