In [1]:
import requests
import pandas as pd
import sys
from pandas.io.json import json_normalize
from datetime import datetime
import gc
from google.oauth2 import credentials
from google_auth_oauthlib.flow import InstalledAppFlow
from google.cloud import bigquery

In [2]:
# Set the scopes for the desired permissions
SCOPES = ['https://www.googleapis.com/auth/bigquery']

# Create the flow for authorization
flow = InstalledAppFlow.from_client_secrets_file(
    '...', scopes=SCOPES
)
credentials = flow.run_local_server(port=0)

project_id = '...'

# Use the obtained credentials for authentication
client = bigquery.Client(credentials=credentials, project=project_id) #

Please visit this URL to authorize this application: https://accounts.google.com/o/oauth2/auth?response_type=code&client_id=853561264369-cg46opisk56diiv2ee4nblkb6jmtland.apps.googleusercontent.com&redirect_uri=http%3A%2F%2Flocalhost%3A55862%2F&scope=https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fbigquery&state=SWyqXB6KasPKC0o2IgIpTjn5Jndpze&access_type=offline


In [3]:
# Construct a reference to the "world_bank_intl_education" dataset
dataset_ref = client.dataset("world_bank_intl_education", project="bigquery-public-data")

# API request - fetch the dataset
dataset = client.get_dataset(dataset_ref)

# Construct a reference to the "international_education" table
table_ref = dataset_ref.table("international_education")

# API request - fetch the table
table = client.get_table(table_ref)

# Preview the first five lines of the "international_education" table
client.list_rows(table, max_results=5).to_dataframe()

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,value,year
0,Chad,TCD,"Enrolment in lower secondary education, both s...",UIS.E.2,321921.0,2012
1,Chad,TCD,"Enrolment in upper secondary education, both s...",UIS.E.3,68809.0,2006
2,Chad,TCD,"Enrolment in upper secondary education, both s...",UIS.E.3,30551.0,1999
3,Chad,TCD,"Enrolment in upper secondary education, both s...",UIS.E.3,79784.0,2007
4,Chad,TCD,"Repeaters in primary education, all grades, bo...",UIS.R.1,282699.0,2006


## SQL Analysis


### Government expenditure on education
Which countries spend the largest fraction of GDP on education?

In [4]:
data = client.list_rows(table).to_dataframe()

In [5]:
data

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,value,year
0,Chad,TCD,"Enrolment in lower secondary education, both s...",UIS.E.2,306639.00000,2010
1,Chad,TCD,"Enrolment in lower secondary education, both s...",UIS.E.2,170052.00000,2004
2,Chad,TCD,"Enrolment in upper secondary education, both s...",UIS.E.3,123744.00000,2010
3,Chad,TCD,"Enrolment in upper secondary education, both s...",UIS.E.3,57804.00000,2004
4,Chad,TCD,"Enrolment in upper secondary education, both s...",UIS.E.3,101428.00000,2008
...,...,...,...,...,...,...
5082196,Middle East & North Africa (excluding high inc...,MNA,"Lower secondary completion rate, gender parity...",UIS.AIR.2.GPV.GLAST.GPI,0.59688,1975
5082197,Middle East & North Africa (excluding high inc...,MNA,"Lower secondary completion rate, gender parity...",UIS.AIR.2.GPV.GLAST.GPI,0.75445,1990
5082198,Middle East & North Africa (excluding high inc...,MNA,"Lower secondary completion rate, gender parity...",UIS.AIR.2.GPV.GLAST.GPI,1.02373,2008
5082199,Middle East & North Africa (excluding high inc...,MNA,"Lower secondary completion rate, gender parity...",UIS.AIR.2.GPV.GLAST.GPI,0.94328,2000


In [6]:
data.indicator_code.unique()

array(['UIS.E.2', 'UIS.E.3', 'UIS.E.4', ..., 'LO.EGRA.READ.0.KII.2GRD',
       'LO.EGRA.CWPM.ZERO.ICI.2GRD', 'LO.EGRA.CWPM.ZERO.LUV.2GRD'],
      dtype=object)

### One interesting indicator code is SE.XPD.TOTL.GD.ZS, which corresponds to "Government expenditure on education as % of GDP (%)".

In [7]:
data[data['indicator_code']=='SE.XPD.TOTL.GD.ZS']

Unnamed: 0,country_name,country_code,indicator_name,indicator_code,value,year
2212,Chad,TCD,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,2.20764,2012
2213,Chad,TCD,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,2.84971,2013
6856,Cuba,CUB,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,7.70486,2000
6857,Cuba,CUB,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,9.94078,2003
6858,Cuba,CUB,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,7.07477,1993
...,...,...,...,...,...,...
5067322,Zambia,ZMB,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,5.94579,1972
5067323,Zambia,ZMB,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,1.24076,2007
5067324,Zambia,ZMB,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,5.15487,1982
5067325,Zambia,ZMB,Government expenditure on education as % of GD...,SE.XPD.TOTL.GD.ZS,3.33899,1986


In [15]:
query = """ 
        SELECT country_name, avg(value) as average_GDP
        FROM `bigquery-public-data.world_bank_intl_education.international_education`
        WHERE indicator_code = 'SE.XPD.TOTL.GD.ZS' and year >= 2010 and year<=2017
        GROUP BY country_name
        ORDER BY average_GDP DESC
        """

In [16]:
query_job = client.query(query)
country_GDP = query_job.to_dataframe()

In [17]:
country_GDP

Unnamed: 0,country_name,average_GDP
0,Cuba,12.837270
1,"Micronesia, Fed. Sts.",12.467750
2,Solomon Islands,10.001080
3,Moldova,8.372153
4,Namibia,8.349610
...,...,...
152,Cambodia,1.706404
153,West Bank and Gaza,1.503760
154,South Sudan,1.409726
155,Monaco,1.409606


### Identify interesting codes to explore

In [26]:
query2 = """ 
        SELECT indicator_name, indicator_code, COUNT(1) as num_rows
        FROM `bigquery-public-data.world_bank_intl_education.international_education`
        GROUP BY indicator_code,indicator_name
        HAVING num_rows >= 175
        ORDER BY num_rows DESC
        """

In [27]:
query_job2 = client.query(query2)
country_indicator_check = query_job2.to_dataframe()

In [28]:
country_indicator_check

Unnamed: 0,indicator_name,indicator_code,num_rows
0,"Population, total",SP.POP.TOTL,11155
1,Population growth (annual %),SP.POP.GROW,11149
2,"Population, ages 15-64 (% of total)",SP.POP.1564.TO.ZS,10243
3,"Population, ages 0-14 (% of total)",SP.POP.0014.TO.ZS,10233
4,"Population, female (% of total)",SP.POP.TOTL.FE.ZS,10233
...,...,...,...
2074,TIMSS: Fourth grade students reaching the low ...,LO.TIMSS.SCI4.LOW,183
2075,TIMSS: Fourth grade students reaching the inte...,LO.TIMSS.SCI4.INT,183
2076,Percentage of teachers in post-secondary non-t...,UIS.TRTP.4,182
2077,Percentage of graduates from tertiary ISCED 6 ...,UIS.FGP.6,180
