# Global Budget Spending Analysis
----
<i> Project 1 Team 5 </i><br/> 
Brett Fuller, Diego Jones, Mav Sanchez

In [34]:
#Import necessary package dependencies
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests
from pprint import pprint
from scipy.stats import linregress

#Import API Key
from config import api_key

#Define directories
source_data_dir = "source_data"
staging_data_dir = "staging_data"
target_data_dir = "target_data"
target_images_dir = "target_images"

### Data extraction
----
#####            API Calls

In [35]:
# Define base URLs for API
base_url_fromwherever ="put main endpoint here"

In [36]:
# Get  data
list_whatever = []
save_to_whatever = []

for a in list_whatever:
    
    query_url = base_url_fromwherever + 'appid=' + api_key + '&param=' + a
    response = requests.get(query_url).json() 
    try:
        list_whatever.append(response) #customize depending on the JSON structure
    except:
        pass

# Save api data to data frames
staging_df = pd.DataFrame(list_whatever)

In [37]:
# Save df to csv so we don't have to perform API calls again
staging_df.to_csv(f"{staging_data_dir}/staging_df.csv")

In [38]:
#####            CSV Load

In [39]:
budget_breakdown = "source_data/budget_breakdown.csv"

budgetbreakdown_df = pd.read_csv(budget_breakdown)
clean_budgetbreakdown = budgetbreakdown_df[['Unnamed: 0', 'Expenditure on general public services',
       'Expenditure on defense', 'Expenditure on public order & safety',
       'Expenditure on economic affairs',
       'Expenditure on environment protection',
       'Expenditure on housing & community amenities', 'Expenditure on health',
       'Expenditure on recreation, culture, & religion',
       'Expenditure on education', 'Expenditure on social protection']]
clean_budgetbreakdown = clean_budgetbreakdown.dropna()

clean_budgetbreakdown = clean_budgetbreakdown.rename(columns = {'Unnamed: 0': 'Country'})


In [40]:
country_rank = "source_data/top_73.csv"
country_ranking = pd.read_csv(country_rank, header=None
                             )
country_ranking = country_ranking.rename(columns = {0: 'Country'})

combined_countrydf = clean_budgetbreakdown.merge(country_ranking, on='Country')
combined_countrydf.columns = ['Country',
               'Expenditure on general public services',
                               'Expenditure on defense',
                 'Expenditure on public order & safety',
                      'Expenditure on economic affairs',
                'Expenditure on environment protection',
         'Expenditure on housing & community amenities',
                                'Expenditure on health',
       'Expenditure on recreation, culture, & religion',
                             'Expenditure on education',
                     'Expenditure on social protection',
                                                      'Overall Ranking',
                                                      'Entrepreneurship Ranking',
                                                      'Adventure Ranking',
                                                      'Citizenship Ranking',
                                                      'Cultural Influence Ranking',
                                                      'Heritage Ranking',
                                                      'Movers Ranking',
                                                      'Open For Business Ranking',
                                                      'Power Ranking',
                                                     'Quality of Life Ranking']

combined_countrydf.head()

Unnamed: 0,Country,Expenditure on general public services,Expenditure on defense,Expenditure on public order & safety,Expenditure on economic affairs,Expenditure on environment protection,Expenditure on housing & community amenities,Expenditure on health,"Expenditure on recreation, culture, & religion",Expenditure on education,...,Overall Ranking,Entrepreneurship Ranking,Adventure Ranking,Citizenship Ranking,Cultural Influence Ranking,Heritage Ranking,Movers Ranking,Open For Business Ranking,Power Ranking,Quality of Life Ranking
0,Austria,6.4,0.6,1.3,5.7,0.4,0.3,8.2,1.2,4.9,...,18,17,22,12,25,20,59,15,27,11
1,Estonia,4.0,2.4,1.9,4.2,0.6,0.3,5.2,2.0,5.7,...,55,40,60,26,61,62,73,26,72,45
2,Finland,8.0,1.3,1.2,4.5,0.2,0.3,7.2,1.4,6.1,...,14,16,27,6,26,43,32,9,35,9
3,France,6.2,1.8,1.6,5.5,0.9,1.1,8.1,1.4,5.5,...,12,15,12,13,2,4,44,29,6,16
4,Germany,5.7,0.9,1.6,3.3,0.6,0.4,7.2,1.0,4.1,...,4,1,50,10,15,19,41,17,4,10


In [41]:
population_breakdown = "source_data/Countries.csv"

populationbreakdown_df = pd.read_csv(population_breakdown)
populationbreakdown_df.columns


doublecombined_countrydf = combined_countrydf.merge(populationbreakdown_df, on='Country')
doublecombined_countrydf.head()

Unnamed: 0,Country,Expenditure on general public services,Expenditure on defense,Expenditure on public order & safety,Expenditure on economic affairs,Expenditure on environment protection,Expenditure on housing & community amenities,Expenditure on health,"Expenditure on recreation, culture, & religion",Expenditure on education,...,Movers Ranking,Open For Business Ranking,Power Ranking,Quality of Life Ranking,GDPPC,Literacy,InfantMortality,Agriculture,Population,NetMigration
0,Austria,6.4,0.6,1.3,5.7,0.4,0.3,8.2,1.2,4.9,...,59,15,27,11,43439,0.98,3.4,0.014,8611000,5.2
1,Estonia,4.0,2.4,1.9,4.2,0.6,0.3,5.2,2.0,5.7,...,73,26,72,45,17295,0.998,3.8,0.034,1312000,-3.2
2,Finland,8.0,1.3,1.2,4.5,0.2,0.3,7.2,1.4,6.1,...,32,9,35,9,41921,1.0,2.5,0.028,5482000,3.0
3,France,6.2,1.8,1.6,5.5,0.9,1.1,8.1,1.4,5.5,...,44,29,6,16,36248,0.99,3.3,0.017,66810000,1.1
4,Germany,5.7,0.9,1.6,3.3,0.6,0.4,7.2,1.0,4.1,...,41,17,4,10,41219,0.99,3.4,0.007,81410000,1.5


In [42]:
# Load raw CSV files to data frame
safety_rank_data = pd.read_csv(f"{source_data_dir}/safety.csv")
safety_rank_data.head()

Unnamed: 0,Rank,Country,Amount spent on Public Safety (in Billions)
0,1,Netherlands,193.4
1,2,Norway,89.84
2,3,Australia,
3,4,Sweden,
4,5,Canada,


In [43]:
health_rank_data = pd.read_csv(f"{source_data_dir}/health.csv")
health_rank_data.head()

Unnamed: 0,Country,Indicator Name,2016,2017
0,"Hong Kong SAR, China","Life expectancy at birth, total (years)",84.226829,84.680488
1,Japan,"Life expectancy at birth, total (years)",83.984878,84.099756
2,"Macao SAR, China","Life expectancy at birth, total (years)",83.854,83.989
3,Switzerland,"Life expectancy at birth, total (years)",83.602439,83.602439
4,Spain,"Life expectancy at birth, total (years)",83.329268,83.329268


### Data cleansing
----

In [44]:
# Options
# Delete nulls
# FillNA with zero, etc
# Reformat data (date format, name format)
# Save only the columns you need

In [45]:
health2016 = pd.merge(health_rank_data, doublecombined_countrydf, on="Country")
health2016

Unnamed: 0,Country,Indicator Name,2016,2017,Expenditure on general public services,Expenditure on defense,Expenditure on public order & safety,Expenditure on economic affairs,Expenditure on environment protection,Expenditure on housing & community amenities,...,Movers Ranking,Open For Business Ranking,Power Ranking,Quality of Life Ranking,GDPPC,Literacy,InfantMortality,Agriculture,Population,NetMigration
0,Japan,"Life expectancy at birth, total (years)",83.984878,84.099756,3.9,0.9,1.2,3.5,1.2,0.7,...,5,25,7,14,32477,0.99,2.0,0.012,127000000,0.0
1,Switzerland,"Life expectancy at birth, total (years)",83.602439,83.602439,4.7,0.8,1.6,4.0,0.6,0.2,...,19,2,13,7,80215,0.99,3.6,0.008,8287000,4.7
2,Spain,"Life expectancy at birth, total (years)",83.329268,83.329268,5.8,1.0,1.9,3.7,0.8,0.4,...,38,28,19,18,25832,0.981,3.3,0.025,46560000,8.0
3,Italy,"Life expectancy at birth, total (years)",83.243902,83.243902,7.9,1.3,1.9,3.9,0.9,0.6,...,21,36,17,21,29847,0.992,3.3,0.022,60800000,3.9
4,Singapore,"Life expectancy at birth, total (years)",82.846341,82.895122,1.0,3.3,1.0,2.7,0.2,1.0,...,7,5,22,20,52889,0.968,2.4,0.0,5535000,13.6
5,Luxembourg,"Life expectancy at birth, total (years)",82.685366,82.685366,5.0,0.4,1.0,5.7,0.8,0.5,...,56,1,30,13,101450,1.0,3.4,0.003,569676,16.3
6,France,"Life expectancy at birth, total (years)",82.52439,82.52439,6.2,1.8,1.6,5.5,0.9,1.1,...,44,29,6,16,36248,0.99,3.3,0.017,66810000,1.1
7,Australia,"Life expectancy at birth, total (years)",82.44878,82.497561,4.2,1.9,1.7,4.0,0.8,0.5,...,16,14,15,5,56328,0.99,3.2,0.025,23780000,5.6
8,Israel,"Life expectancy at birth, total (years)",82.407317,82.602439,3.6,5.7,1.6,2.2,0.6,0.2,...,15,62,8,46,35330,0.978,3.5,0.013,8380000,2.2
9,Norway,"Life expectancy at birth, total (years)",82.407317,82.509756,4.7,1.6,1.2,5.4,0.9,0.9,...,23,10,23,4,74735,1.0,2.5,0.017,5196000,6.6


In [46]:
health2016["2016"] = health2016["2016"].map("{:.2f}".format)
health2016.head()

Unnamed: 0,Country,Indicator Name,2016,2017,Expenditure on general public services,Expenditure on defense,Expenditure on public order & safety,Expenditure on economic affairs,Expenditure on environment protection,Expenditure on housing & community amenities,...,Movers Ranking,Open For Business Ranking,Power Ranking,Quality of Life Ranking,GDPPC,Literacy,InfantMortality,Agriculture,Population,NetMigration
0,Japan,"Life expectancy at birth, total (years)",83.98,84.099756,3.9,0.9,1.2,3.5,1.2,0.7,...,5,25,7,14,32477,0.99,2.0,0.012,127000000,0.0
1,Switzerland,"Life expectancy at birth, total (years)",83.6,83.602439,4.7,0.8,1.6,4.0,0.6,0.2,...,19,2,13,7,80215,0.99,3.6,0.008,8287000,4.7
2,Spain,"Life expectancy at birth, total (years)",83.33,83.329268,5.8,1.0,1.9,3.7,0.8,0.4,...,38,28,19,18,25832,0.981,3.3,0.025,46560000,8.0
3,Italy,"Life expectancy at birth, total (years)",83.24,83.243902,7.9,1.3,1.9,3.9,0.9,0.6,...,21,36,17,21,29847,0.992,3.3,0.022,60800000,3.9
4,Singapore,"Life expectancy at birth, total (years)",82.85,82.895122,1.0,3.3,1.0,2.7,0.2,1.0,...,7,5,22,20,52889,0.968,2.4,0.0,5535000,13.6


In [47]:
reducehealth_df = df.loc[:, ["Country", "2016", "Expenditure on health"]]
reducedhealth_df.head()

NameError: name 'df' is not defined

### Data analysis
----

#### Question 1: (Sample) What our developed countries' most prioritized spending?
Answer here

In [9]:
# Perform statistical analysis
# Put applicable plot depending on your question