In [1]:
import os
os.chdir('../../')

In [2]:
import pandas as pd
import re
from google.oauth2 import service_account
from google.cloud import bigquery

In [3]:
pd.options.display.max_rows = 100
pd.options.display.max_columns = 100

In [4]:
SERVICE_ACCOUNT = 'private/bigquery-write-access.json'
DATASET = 'student_counts'
credentials = service_account.Credentials.from_service_account_file(SERVICE_ACCOUNT)

In [5]:
bqclient = bigquery.Client(project=credentials.project_id, credentials=credentials)

## Read Data

In [6]:
query_string = \
"""
SELECT * 
FROM `int-stud-mobility-drivers.datasets_for_modelling.students_by_year_countrly_all_predictors` 
"""
dataframe = bqclient.query(query_string).result().to_dataframe()

In [7]:
country_codes = {
    "Australia": 'aus', 
    'Canada': 'can', 
    'United Kingdom': 'uk',
    'United States of America': 'us',
    'New Zealand': 'nz'
}
destination_col = 'destination_code'
dataframe[destination_col] = dataframe.destination_country.apply(lambda x: country_codes[x])

In [8]:
#dataframe = dataframe[dataframe.source_country == 'China']
#dataframe = dataframe[dataframe.destination_country == 'Canada']

In [9]:
index_cols = ['source_country', 'year']
dst_cols = [col for col in dataframe.columns if col.startswith('dst')]
src_cols = [col for col in dataframe.columns if col.startswith('src')]

## Transform Destination Country Variables 

In [10]:
dest_index = index_cols + [destination_col]
dest_df = dataframe[dest_index + dst_cols].copy()
dest_df.set_index(dest_index, inplace=True)
dest_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,dst_distance,dst_students_count,dst_average_wage,dst_cost_of_living,dst_rent_cost_of_liv,dst_rent_index,dst_groceries_index,dst_purchasing_power_index,dst_resraurant_price_index,dst_rate_of_one_usd,dst_qoe_top20,dst_qoe_top100,dst_qoe_top200,dst_qoe_top500
source_country,year,destination_code,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
China,2017,aus,7470,128498,53507.0,80.66,62.39,42.54,76.87,101.94,77.12,1.304758,0.0,6.0,10.0,23.0
China,2016,aus,7470,112329,53972.0,78.45,60.43,40.95,74.68,147.25,74.85,1.345214,0.0,6.0,8.0,23.0
China,2015,aus,7470,97387,53946.0,99.32,74.96,49.47,94.31,110.39,90.63,1.33109,0.0,4.0,8.0,20.0
China,2014,aus,7470,90245,54392.0,108.51,83.95,57.97,105.03,104.2,97.44,1.109363,0.0,4.0,8.0,19.0
China,2011,aus,7470,90175,53906.0,107.83,87.43,51.03,112.03,117.1,88.67,0.969463,0.0,4.0,7.0,19.0


In [11]:
dest_df = dest_df.unstack(level=destination_col)
dest_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,dst_distance,dst_distance,dst_distance,dst_distance,dst_distance,dst_students_count,dst_students_count,dst_students_count,dst_students_count,dst_students_count,dst_average_wage,dst_average_wage,dst_average_wage,dst_average_wage,dst_average_wage,dst_cost_of_living,dst_cost_of_living,dst_cost_of_living,dst_cost_of_living,dst_cost_of_living,dst_rent_cost_of_liv,dst_rent_cost_of_liv,dst_rent_cost_of_liv,dst_rent_cost_of_liv,dst_rent_cost_of_liv,dst_rent_index,dst_rent_index,dst_rent_index,dst_rent_index,dst_rent_index,dst_groceries_index,dst_groceries_index,dst_groceries_index,dst_groceries_index,dst_groceries_index,dst_purchasing_power_index,dst_purchasing_power_index,dst_purchasing_power_index,dst_purchasing_power_index,dst_purchasing_power_index,dst_resraurant_price_index,dst_resraurant_price_index,dst_resraurant_price_index,dst_resraurant_price_index,dst_resraurant_price_index,dst_rate_of_one_usd,dst_rate_of_one_usd,dst_rate_of_one_usd,dst_rate_of_one_usd,dst_rate_of_one_usd,dst_qoe_top20,dst_qoe_top20,dst_qoe_top20,dst_qoe_top20,dst_qoe_top20,dst_qoe_top100,dst_qoe_top100,dst_qoe_top100,dst_qoe_top100,dst_qoe_top100,dst_qoe_top200,dst_qoe_top200,dst_qoe_top200,dst_qoe_top200,dst_qoe_top200,dst_qoe_top500,dst_qoe_top500,dst_qoe_top500,dst_qoe_top500,dst_qoe_top500
Unnamed: 0_level_1,destination_code,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us,aus,can,nz,uk,us
source_country,year,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2
China,1998,7470.0,9380.0,11160.0,7775.0,,4132.0,2820.0,78.0,2877.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.591828,1.483505,1.868249,0.603824,,,,,,,,,,,,,,,,,,,,,
China,1999,7470.0,9380.0,11160.0,7775.0,11640.0,4578.0,3489.0,247.0,4250.0,46949.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.54995,1.485705,1.889614,0.618057,1.0,,,,,,,,,,,,,,,,,,,,
China,2000,7470.0,9380.0,11160.0,7775.0,11640.0,5008.0,4701.0,1133.0,6158.0,50281.0,45459.0,39380.0,31806.0,38908.0,53904.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.724827,1.485394,2.201149,0.660931,1.0,,,,,,,,,,,,,,,,,,,,
China,2001,,9380.0,11160.0,7775.0,11640.0,,6972.0,3338.0,10388.0,51986.0,,39218.0,32391.0,40840.0,54360.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.54884,2.378751,0.694655,1.0,,,,,,,,,,,,,,,,,,,,
China,2002,7470.0,9380.0,11160.0,7775.0,11640.0,17343.0,10176.0,8481.0,17483.0,63211.0,46270.0,38800.0,33005.0,41517.0,54788.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.840563,1.570343,2.162191,0.667223,1.0,,,,,,,,,,,,,,,,,,,,


In [12]:
dest_df.columns = ['{}_{}'.format(var, code) for var, code in dest_df.columns.values]

In [13]:
dest_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,dst_distance_aus,dst_distance_can,dst_distance_nz,dst_distance_uk,dst_distance_us,dst_students_count_aus,dst_students_count_can,dst_students_count_nz,dst_students_count_uk,dst_students_count_us,dst_average_wage_aus,dst_average_wage_can,dst_average_wage_nz,dst_average_wage_uk,dst_average_wage_us,dst_cost_of_living_aus,dst_cost_of_living_can,dst_cost_of_living_nz,dst_cost_of_living_uk,dst_cost_of_living_us,dst_rent_cost_of_liv_aus,dst_rent_cost_of_liv_can,dst_rent_cost_of_liv_nz,dst_rent_cost_of_liv_uk,dst_rent_cost_of_liv_us,dst_rent_index_aus,dst_rent_index_can,dst_rent_index_nz,dst_rent_index_uk,dst_rent_index_us,dst_groceries_index_aus,dst_groceries_index_can,dst_groceries_index_nz,dst_groceries_index_uk,dst_groceries_index_us,dst_purchasing_power_index_aus,dst_purchasing_power_index_can,dst_purchasing_power_index_nz,dst_purchasing_power_index_uk,dst_purchasing_power_index_us,dst_resraurant_price_index_aus,dst_resraurant_price_index_can,dst_resraurant_price_index_nz,dst_resraurant_price_index_uk,dst_resraurant_price_index_us,dst_rate_of_one_usd_aus,dst_rate_of_one_usd_can,dst_rate_of_one_usd_nz,dst_rate_of_one_usd_uk,dst_rate_of_one_usd_us,dst_qoe_top20_aus,dst_qoe_top20_can,dst_qoe_top20_nz,dst_qoe_top20_uk,dst_qoe_top20_us,dst_qoe_top100_aus,dst_qoe_top100_can,dst_qoe_top100_nz,dst_qoe_top100_uk,dst_qoe_top100_us,dst_qoe_top200_aus,dst_qoe_top200_can,dst_qoe_top200_nz,dst_qoe_top200_uk,dst_qoe_top200_us,dst_qoe_top500_aus,dst_qoe_top500_can,dst_qoe_top500_nz,dst_qoe_top500_uk,dst_qoe_top500_us
source_country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1
China,1998,7470.0,9380.0,11160.0,7775.0,,4132.0,2820.0,78.0,2877.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.591828,1.483505,1.868249,0.603824,,,,,,,,,,,,,,,,,,,,,
China,1999,7470.0,9380.0,11160.0,7775.0,11640.0,4578.0,3489.0,247.0,4250.0,46949.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.54995,1.485705,1.889614,0.618057,1.0,,,,,,,,,,,,,,,,,,,,
China,2000,7470.0,9380.0,11160.0,7775.0,11640.0,5008.0,4701.0,1133.0,6158.0,50281.0,45459.0,39380.0,31806.0,38908.0,53904.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.724827,1.485394,2.201149,0.660931,1.0,,,,,,,,,,,,,,,,,,,,
China,2001,,9380.0,11160.0,7775.0,11640.0,,6972.0,3338.0,10388.0,51986.0,,39218.0,32391.0,40840.0,54360.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.54884,2.378751,0.694655,1.0,,,,,,,,,,,,,,,,,,,,
China,2002,7470.0,9380.0,11160.0,7775.0,11640.0,17343.0,10176.0,8481.0,17483.0,63211.0,46270.0,38800.0,33005.0,41517.0,54788.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.840563,1.570343,2.162191,0.667223,1.0,,,,,,,,,,,,,,,,,,,,


## Source Country Variables

In [14]:
source_df = dataframe[index_cols + src_cols].copy().drop_duplicates()
source_df.set_index(index_cols, inplace=True)
source_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,src_population,src_qoe_top20,src_qoe_top100,src_qoe_top200,src_qoe_top500,src_gni,src_gdp,src_rate_of_one_usd,src_tertiary_enrolment
source_country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
China,2017,1386395000,0.0,2.0,13.0,57.0,8630.0,12100000000000.0,6.758755,51.006741
China,2016,1378665000,0.0,2.0,12.0,54.0,8210.0,11100000000000.0,6.644478,48.441051
China,2015,1371220000,0.0,0.0,10.0,44.0,7910.0,11000000000000.0,6.227489,45.350719
China,2014,1364270000,0.0,0.0,9.0,44.0,7500.0,10400000000000.0,6.143434,41.277729
China,2011,1344130000,0.0,0.0,3.0,35.0,5050.0,7550000000000.0,6.461461,25.292101


## Join source and destination predictors

In [15]:
dataset_df = pd.concat([source_df, dest_df], sort=False, axis=1)
dataset_df

Unnamed: 0_level_0,Unnamed: 1_level_0,src_population,src_qoe_top20,src_qoe_top100,src_qoe_top200,src_qoe_top500,src_gni,src_gdp,src_rate_of_one_usd,src_tertiary_enrolment,dst_distance_aus,dst_distance_can,dst_distance_nz,dst_distance_uk,dst_distance_us,dst_students_count_aus,dst_students_count_can,dst_students_count_nz,dst_students_count_uk,dst_students_count_us,dst_average_wage_aus,dst_average_wage_can,dst_average_wage_nz,dst_average_wage_uk,dst_average_wage_us,dst_cost_of_living_aus,dst_cost_of_living_can,dst_cost_of_living_nz,dst_cost_of_living_uk,dst_cost_of_living_us,dst_rent_cost_of_liv_aus,dst_rent_cost_of_liv_can,dst_rent_cost_of_liv_nz,dst_rent_cost_of_liv_uk,dst_rent_cost_of_liv_us,dst_rent_index_aus,dst_rent_index_can,dst_rent_index_nz,dst_rent_index_uk,dst_rent_index_us,dst_groceries_index_aus,dst_groceries_index_can,dst_groceries_index_nz,dst_groceries_index_uk,dst_groceries_index_us,dst_purchasing_power_index_aus,dst_purchasing_power_index_can,dst_purchasing_power_index_nz,dst_purchasing_power_index_uk,dst_purchasing_power_index_us,dst_resraurant_price_index_aus,dst_resraurant_price_index_can,dst_resraurant_price_index_nz,dst_resraurant_price_index_uk,dst_resraurant_price_index_us,dst_rate_of_one_usd_aus,dst_rate_of_one_usd_can,dst_rate_of_one_usd_nz,dst_rate_of_one_usd_uk,dst_rate_of_one_usd_us,dst_qoe_top20_aus,dst_qoe_top20_can,dst_qoe_top20_nz,dst_qoe_top20_uk,dst_qoe_top20_us,dst_qoe_top100_aus,dst_qoe_top100_can,dst_qoe_top100_nz,dst_qoe_top100_uk,dst_qoe_top100_us,dst_qoe_top200_aus,dst_qoe_top200_can,dst_qoe_top200_nz,dst_qoe_top200_uk,dst_qoe_top200_us,dst_qoe_top500_aus,dst_qoe_top500_can,dst_qoe_top500_nz,dst_qoe_top500_uk,dst_qoe_top500_us
source_country,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1
China,1998,1241935000,,,,,800.0,1030000000000.0,8.278958,5.95095,7470.0,9380.0,11160.0,7775.0,,4132.0,2820.0,78.0,2877.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.591828,1.483505,1.868249,0.603824,,,,,,,,,,,,,,,,,,,,,
China,1999,1252735000,,,,,860.0,1090000000000.0,8.27825,6.45695,7470.0,9380.0,11160.0,7775.0,11640.0,4578.0,3489.0,247.0,4250.0,46949.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.54995,1.485705,1.889614,0.618057,1.0,,,,,,,,,,,,,,,,,,,,
China,2000,1262645000,,,,,940.0,1210000000000.0,8.278504,7.62093,7470.0,9380.0,11160.0,7775.0,11640.0,5008.0,4701.0,1133.0,6158.0,50281.0,45459.0,39380.0,31806.0,38908.0,53904.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.724827,1.485394,2.201149,0.660931,1.0,,,,,,,,,,,,,,,,,,,,
China,2001,1271850000,,,,,1010.0,1340000000000.0,8.277068,9.76759,,9380.0,11160.0,7775.0,11640.0,,6972.0,3338.0,10388.0,51986.0,,39218.0,32391.0,40840.0,54360.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.54884,2.378751,0.694655,1.0,,,,,,,,,,,,,,,,,,,,
China,2002,1280400000,,,,,1110.0,1470000000000.0,8.276957,12.50686,7470.0,9380.0,11160.0,7775.0,11640.0,17343.0,10176.0,8481.0,17483.0,63211.0,46270.0,38800.0,33005.0,41517.0,54788.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.840563,1.570343,2.162191,0.667223,1.0,,,,,,,,,,,,,,,,,,,,
China,2003,1288400000,,,,,1280.0,1660000000000.0,8.277037,15.24153,7470.0,9380.0,11160.0,7775.0,11640.0,23448.0,14592.0,16479.0,30690.0,92774.0,46811.0,38802.0,34211.0,42652.0,55460.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.541914,1.401015,1.722099,0.612472,1.0,,,,,,,,,,,,,,,,,,,,
China,2004,1296075000,0.0,0.0,1.0,16.0,1510.0,1960000000000.0,8.276801,17.440081,7470.0,9380.0,11160.0,7775.0,11640.0,28309.0,18141.0,24215.0,47738.0,87943.0,48267.0,39848.0,35420.0,43663.0,56490.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.359752,1.301282,1.508681,0.54618,1.0,0.0,0.0,0.0,2.0,17.0,2.0,4.0,0.0,11.0,51.0,6.0,9.0,0.0,18.0,90.0,14.0,23.0,3.0,42.0,170.0
China,2005,1303720000,0.0,0.0,2.0,18.0,1760.0,2290000000000.0,8.194317,18.849369,7470.0,9380.0,11160.0,7775.0,11640.0,37344.0,19752.0,23260.0,52677.0,92370.0,48891.0,41566.0,36022.0,43597.0,56557.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.309473,1.211405,1.420273,0.549998,1.0,0.0,0.0,0.0,2.0,17.0,2.0,4.0,0.0,11.0,53.0,6.0,8.0,0.0,19.0,90.0,14.0,23.0,5.0,40.0,168.0
China,2006,1311020000,0.0,0.0,3.0,19.0,2060.0,2750000000000.0,7.973438,20.0445,7470.0,9380.0,,7775.0,11640.0,42008.0,12279.0,,50753.0,93672.0,49208.0,42908.0,,44590.0,57454.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.327973,1.134345,,0.543487,1.0,0.0,0.0,,2.0,17.0,2.0,4.0,,11.0,54.0,6.0,8.0,,22.0,87.0,16.0,22.0,,43.0,167.0
China,2007,1317885000,0.0,0.0,2.0,25.0,2510.0,3550000000000.0,7.607532,20.46084,7470.0,9380.0,11160.0,7775.0,11640.0,50418.0,21081.0,13535.0,49594.0,98958.0,50565.0,43969.0,38388.0,45882.0,58597.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.195072,1.074046,1.360675,0.499772,1.0,0.0,0.0,0.0,2.0,17.0,2.0,4.0,0.0,11.0,54.0,7.0,7.0,0.0,23.0,88.0,17.0,22.0,5.0,42.0,166.0


In [17]:
table_full_name = '{}.{}'.format('datasets_for_modelling', 'students_by_year_countrly_transformed')
dataset_df.to_gbq(table_full_name, credentials.project_id, credentials=credentials, if_exists='replace')

1it [00:06,  6.34s/it]


In [16]:
dataset_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 80 entries, (China, 1998) to (Malaysia, 2017)
Data columns (total 79 columns):
src_population                    80 non-null int64
src_qoe_top20                     35 non-null float64
src_qoe_top100                    35 non-null float64
src_qoe_top200                    35 non-null float64
src_qoe_top500                    35 non-null float64
src_gni                           80 non-null float64
src_gdp                           80 non-null float64
src_rate_of_one_usd               80 non-null float64
src_tertiary_enrolment            76 non-null float64
dst_distance_aus                  76 non-null float64
dst_distance_can                  76 non-null float64
dst_distance_nz                   76 non-null float64
dst_distance_uk                   76 non-null float64
dst_distance_us                   72 non-null float64
dst_students_count_aus            76 non-null float64
dst_students_count_can            76 non-null float64
dst_stude