In [1]:
'''
Population data from FRED API

'''

import requests
import pandas as pd
%matplotlib inline
import matplotlib.pyplot as plt
from datetime import datetime
plt.style.use('seaborn-whitegrid')
import numpy as np
import os
from dateutil import parser
from collections import defaultdict

ModuleNotFoundError: No module named 'requests'

In [None]:

def do_series_search(search_data):
    '''
    Does a search of FRED data series
    INPUT: data to seach (list of strings)
    OUTPUT: json results from FRED
    '''
    fred_search_url = 'https://api.stlouisfed.org/fred/series/search?api_key=4a35874b8a4526dd50b390e46f396651&search_text={0}&file_type=json'  
    if type(search_data) != type(list):
        search_data = [search_data]
    search_string = '+'.join([x.lower() for x in search_data])
    print(search_string)
    fred_response = requests.get(fred_search_url.format(search_string))
    result = fred_response.json()
    return result
    
def get_series(series_id):
    '''
    Fetches FRED data series
    INPUT: data to seach (list of strings)
    OUTPUT: json results from FRED
    '''
    fred_series_fetch_url = 'https://api.stlouisfed.org/fred/series/observations?series_id={0}' + \
'&api_key=4a35874b8a4526dd50b390e46f396651&file_type=json'
    fred_response = requests.get(fred_series_fetch_url.format(series_id))
    result = fred_response.json()
    #print(result)
    return result 


def get_population_data(country):
    '''
    Get Poplatation data from FRED
    INPUT: Country (String)
    OUTPUT: dictionary of data 
    '''
    population_titles_of_interest = [
    'Employment to Population Ratio for',
    'Age Dependency Ratio: Older Dependents to Working-Age Population for',
    'Population Ages 15 to 64 for',
    'Population, Total for',
    'Population'
    ]

    search_string = country.lower()
    
    country_series  = do_series_search(search_string)
    series_ids = {}
    series_notes = {}
    #print(country_series)

    for elem in country_series['seriess']:
        #print(elem)
        for title in population_titles_of_interest:
            #if elem['title'].startswith(title):
            if 'population' in elem['title'].lower():
                series_ids[elem['title']] = elem['id']
                series_notes[elem['title']] = elem['notes']
    print(series_ids)
    ans = {}
    for title, series_id in series_ids.items():
        series = {}
        series_info = get_series(series_id)
        series['title'] = title
        series['units'] = series_info['units']
        series['notes'] = series_notes[title]
        series['dates'] = [x['date'] for x in series_info['observations']]
        series['values'] = [x['value'] for x in series_info['observations']]
        ans[title] = series
    
    return ans


def get_series_info_by_title(title, country):
    '''
    Get series data by specific title
    INPUT: title (str) 
            country (str)
    OUTPUT: series info
    
    '''
    print(title)
    print(country)
    save_country = country
    if country.lower() == 'south korea':
        country = 'republic of korea'
    country_series  = do_series_search(country.lower())
    series_ids = {}
    series_notes = {}
    for elem in country_series['seriess']:
        print(elem['title'] ,elem['frequency'], elem['id'])
        #print(elem)
        if title.lower() == elem['title'].lower():
            if 'frequency' in elem:
                if elem['frequency'].lower() == 'annual':
                    print('found one {0}'.format(title))
                    print(elem)
                    series_ids[elem['title']] = elem['id']
                    series_notes[elem['title']] = elem['notes']
    print('series ids', series_ids)
    ans = {}
    # For some reason cannot generically find us data
    # series_id['population, total for united states'] = 'POPTOTUSA647NWDB'
    # series_notes = {'title':'population, total for united states'}
    for title, series_id in series_ids.items():
        print('search1', title, series_id)
        series = {}
        series_info = get_series(series_id)
        series['title'] = title
        series['units'] = series_info['units']
        series['notes'] = series_notes[title]
        series['dates'] = [x['date'] for x in series_info['observations']]
        series['values'] = [x['value'] for x in series_info['observations']]
        ans[save_country] = series
    return ans
    

        
                
    
    
    

In [None]:
'''
These do not work as the titles vary by country

'''

def get_total_population_data_for_counry_set(country_set):
    '''
    Get total population
    
    '''
    country_set_data = {}
    for country in country_set:
        save_country = country
        country = country.lower()
        title_string = 'population for {0}'.format(country)
        if country.lower() in {'usa', 'united states'}:
            title_string = 'population' 
        print(title_string)
        info = get_series_info_by_title(title_string, save_country)
        country_set_data[save_country] = info
    return country_set_data


'''


'''
def get_working_age_percent_for_country_set(country_set):
    country_set_data = {}
    for country in country_set:
        save_country = country
        country = country.lower()
        title_string = 'population ages 15 to 64 for {0}'.format(country)
        info = get_series_info_by_title(title_string, save_country)
        country_set_data[save_country] = info
    return country_set_data
    
    
    

In [None]:
this_set = get_total_population_data_for_counry_set(['United States'])





In [None]:
this_set

In [None]:
country_set = ['China', 'Japan', 'Republic of Korea', 'Germany', 'United States']

In [None]:
total_population_set = get_total_population_data_for_counry_set(country_set)

In [None]:

total_population_set

In [None]:
working_population_set = get_working_age_percent_for_country_set(country_set)

In [None]:
working_population_set

In [None]:

working_age_population_set = {
'Germany':'Working Age Population: Aged 15-64: All Persons for Germany',
'South Korea':'Working Age Population: Aged 15-64: All Persons for the Republic of Korea',
'Japan':'Working Age Population: Aged 15-64: All Persons for Japan',
'USA':'Working Age Population: Aged 15-64: All Persons for the United States',
'China':'Population Ages 15 to 64 for China', # This is a ratio -- need to multiple by population
}

working_age_population_ratio_set = {
'Germany':'Employment to Population Ratio for Germany',
'South Korea':'Employment to Population Ratio for the Republic of Korea',
'Japan':'Employment to Population Ratio for Japan',
'USA':'Employment to Population Ratio for the United States', # USA
'China':'Population Ages 15 to 64 for China',
}


In [None]:
working_age_population = {}
for country in working_age_population_set:
    working_age_population[country] =  get_series_info_by_title( working_age_population_set[country], country )

In [None]:
working_age_population_ratio = {}
for country in working_age_population_ratio_set:
    working_age_population_ratio[country] = get_series_info_by_title(working_age_population_ratio_set[country], country )

In [None]:
working_age_population

In [None]:
calculated_working_age_population = []
calculated_working_age_dates = []
for date in working_age_population['China']['China']['dates']:
    if date in total_population_set['China']['China']['dates']:
        calculated_working_age_dates.append(date)
        working_population_index = working_age_population['China']['China']['dates'].index(date)
        total_population_index = total_population_set['China']['China']['dates'].index(date)
        print(date, working_population_index, total_population_index)
        total_population_value = total_population_set['China']['China']['values'][total_population_index]
        working_population_value = working_age_population['China']['China']['values'][working_population_index]
        print(total_population_value, working_population_value)
        if  working_population_value.replace('.','',1).isnumeric() and working_population_value.replace('.','',1).isnumeric():
            print(total_population_value,working_population_value )
            calculated_working_age_population.append(
                str( float(total_population_value) * (float(working_population_value) /100)  ))
        else:
            calculated_working_age_population.append('')
        
        
     

In [None]:
calculated_working_age_population

In [None]:
working_age_population['China']['China']['values'] = calculated_working_age_population
working_age_population['China']['China']['dates'] = calculated_working_age_dates

In [None]:
working_age_population['China']['China']['dates']

In [None]:
#working_age_population_ratio

In [None]:
from collections import defaultdict

In [None]:
def make_dataframe(dict_info):
    columns = dict_info.keys()
    values = defaultdict(list)
    dates_set = set()
    for country in columns:
        #print(country)
        #print(dict_info[country])
        #print(dict_info[country][country])
        dates_set.update(set(dict_info[country][country]['dates']))
   
    print (dates_set)
    
    dates_list = list(dates_set)
    dates_list = sorted(dates_list)
    for date in dates_list:
        for country in dict_info:
            print(date, country)
            if date in dict_info[country][country]['dates']:
                values[date].append(
                    dict_info[country][country]['values'][dict_info[country][country]['dates'].index(date)])
            else:
                values[date].append('')
    print(values)
    df_ans = pd.DataFrame.from_dict(values, orient='index', columns=columns)         
    return df_ans
    

In [None]:
df_countries = make_dataframe(working_age_population)

In [None]:

df_countries.rename(columns={'USA':'United States'}, inplace=True)

In [None]:
# Working age population
df_countries

In [None]:
df_countries.head()

In [None]:
for column in list(df_countries.columns):
    df_countries[column] = pd.to_numeric(df_countries[column],  errors='coerce')
    print(column)
    if column != 'China':
        df_countries[column] = df_countries[column] / 1000000


In [None]:
df_countries_total = make_dataframe(total_population_set)

In [None]:
df_countries_total.rename(columns={'Republic of Korea': 'South Korea'}, inplace=True)

In [None]:
df_countries_total

In [None]:
for column in list(df_countries_total.columns):
    df_countries_total[column] = pd.to_numeric(df_countries_total[column],  errors='coerce')
    print(column)
    if column == 'China':
        df_countries_total[column] = df_countries_total[column]
    else:
        df_countries_total[column] = df_countries_total[column] / 1000

In [None]:
# Change year labels to only year
df_countries['Year'] = df_countries.index.map(lambda x: x.split('-')[0])
df_countries_total['Year'] = df_countries_total.index.map(lambda x: x.split('-')[0])

In [None]:
df_countries.set_index(['Year'], inplace=True)
df_countries_total.set_index(['Year'], inplace=True)

In [None]:
data_dir = '../data'

In [None]:
import os
pop_file = 'FRED_pop.csv'
work_file = 'FRED_work.csv'
pop_path = os.path.join(data_dir, pop_file)
work_path = os.path.join(data_dir, work_file)

In [None]:
df_countries.to_csv(work_path, index=True)
df_countries_total.to_csv(pop_path, index=True)

In [None]:
ls -al '/Users/brianmckean/Dropbox/ChinaEconomy/'

In [None]:
df_countries

In [None]:
df_countries.plot(title='Working Age Population')

In [None]:
df_temp = df_countries.copy()

In [None]:
# Normalize data
df_temp2 = (df_temp - df_temp.min()) / (df_temp.max() - df_temp.min())

In [None]:
df_temp2.plot(title='Working Age Population  - normalized')

In [None]:
df_temp3 = (df_temp / df_temp.max()) * 100

In [None]:
df_temp3.plot(title='Working Age Population  - max = 100%')

In [None]:
df_countries_total.plot(title='Total Population')

In [None]:
df_countries_total