# Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import numpy as np
import pandas as pd

This notebook contains code use to scrape additional data from wikipedia that would be used to supplement the original data set.  Scraped data includes divorce rate, education indices, GDP per capita, life expectancy and tertiary education attainment by country. 

# Divorce

In [2]:
url = 'https://en.wikipedia.org/wiki/Divorce_demography'

response = requests.get(url)
page = response.text

soup = BeautifulSoup(page,"lxml")

In [3]:
table = soup.find('table')
table

<table class="wikitable sortable">
<tbody><tr>
<th rowspan="2">Country/region
</th>
<th rowspan="2">Continent
</th>
<th colspan="2">Crude rate
</th>
<th colspan="2">Ratio
</th>
<th rowspan="2">Data Source Year
</th></tr>
<tr>
<th>Marriage</th>
<th>Divorce
</th>
<th>Actual</th>
<th>Percent
</th></tr>
<tr>
<td align="left"><span class="datasortkey" data-sort-value="Albania"><span class="flagicon"><img alt="" class="thumbborder" data-file-height="700" data-file-width="980" decoding="async" height="15" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/36/Flag_of_Albania.svg/21px-Flag_of_Albania.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/36/Flag_of_Albania.svg/32px-Flag_of_Albania.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/36/Flag_of_Albania.svg/42px-Flag_of_Albania.svg.png 2x" width="21"/> </span><a href="/wiki/Albania" title="Albania">Albania</a></span>
</td>
<td>Europe
</td>
<td>7.8
</td>
<td>1.9
</td>
<td>4.11
</td>
<td>24.36
</td>
<td>(201

In [4]:
rows = [row for row in table.find_all('tr')]  # tr tag is for rows

In [5]:
rows[2]

<tr>
<td align="left"><span class="datasortkey" data-sort-value="Albania"><span class="flagicon"><img alt="" class="thumbborder" data-file-height="700" data-file-width="980" decoding="async" height="15" src="//upload.wikimedia.org/wikipedia/commons/thumb/3/36/Flag_of_Albania.svg/21px-Flag_of_Albania.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/3/36/Flag_of_Albania.svg/32px-Flag_of_Albania.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/3/36/Flag_of_Albania.svg/42px-Flag_of_Albania.svg.png 2x" width="21"/> </span><a href="/wiki/Albania" title="Albania">Albania</a></span>
</td>
<td>Europe
</td>
<td>7.8
</td>
<td>1.9
</td>
<td>4.11
</td>
<td>24.36
</td>
<td>(2016)<sup class="reference" id="cite_ref-Eurostat2018_1-0"><a href="#cite_note-Eurostat2018-1">[1]</a></sup>
</td></tr>

In [6]:
divorce = {}

for row in rows[2:]:
    items = row.find_all('td')
    link = items[0].find('a')
    country = link.text
    continent = items[1].text
    divorce_rate = items[5].text
    divorce[country] = [continent] + [divorce_rate]
    
divorce

{'Albania': ['Europe\n', '24.36\n'],
 'Algeria': ['Africa\n', '15.84\n'],
 'Armenia': ['Europe\n', '16.67\n'],
 'Australia': ['Oceania\n', '43.48\n'],
 'Austria': ['Europe\n', '35.29\n'],
 'Azerbaijan': ['Asia\n', '15.46\n'],
 'Bahamas': ['North America\n', '16.39\n'],
 'Bangladesh': ['Asia\n', '34.48\n'],
 'Belarus': ['Europe\n', '44.57\n'],
 'Belgium': ['Europe\n', '53.85\n'],
 'Bermuda': ['\n', '25.47\n'],
 'Bosnia and Herzegovina': ['Europe\n', '12.5\n'],
 'Brazil': ['South America\n', '21.21\n'],
 'Bulgaria': ['Europe\n', '39.47\n'],
 'Canada': ['North America\n', '47.73\n'],
 'Chile': ['South America\n', '21.21\n'],
 'China': ['Asia\n', '30.11\n'],
 'Colombia': ['South America\n', '30.43\n'],
 'Costa Rica': ['North America\n', '47.17\n'],
 'Croatia': ['Europe\n', '34.69\n'],
 'Cuba': ['North America\n', '55.77\n'],
 'Cyprus': ['Asia\n', '30.67\n'],
 'Czech Republic': ['Europe\n', '50\n'],
 'Denmark': ['Europe\n', '55.56\n'],
 'Dominican Republic': ['North America\n', '40.91\n'],


In [7]:
divorce_df = pd.DataFrame.from_dict(data = divorce, orient = 'index', columns = ['continent', 'divorce_rate']).reset_index()
divorce_df = divorce_df.rename({'index':'country'}, axis = 1)
divorce_df = divorce_df.replace('\n','', regex = True)
divorce_df

Unnamed: 0,country,continent,divorce_rate
0,Albania,Europe,24.36
1,Algeria,Africa,15.84
2,Armenia,Europe,16.67
3,Australia,Oceania,43.48
4,Austria,Europe,35.29
...,...,...,...
100,United States,North America,42.03
101,Uruguay,South America,
102,Uzbekistan,Asia,14.1
103,Venezuela,South America,36.36


In [8]:
divorce_df.to_csv('divorce_df.csv')

# Education Index

In [9]:
url2 = 'https://en.wikipedia.org/wiki/Education_Index'

response2 = requests.get(url2)
page2 = response2.text

soup2 = BeautifulSoup(page2,"lxml")

In [10]:
table2 = soup2.findAll('table')[1]
print(table2.prettify())

<table class="wikitable sortable mw-collapsible" style="text-align:right;">
 <caption>
  2015 Education Index
  <sup class="reference" id="cite_ref-5">
   <a href="#cite_note-5">
    [5]
   </a>
  </sup>
 </caption>
 <tbody>
  <tr>
   <th>
    Rank
   </th>
   <th>
    Country
   </th>
   <th>
    Education
    <br/>
    Index
   </th>
   <th>
    Expected years
    <br/>
    of schooling
   </th>
   <th>
    Mean years
    <br/>
    of schooling
   </th>
   <th>
    HDI rank
   </th>
   <th>
    Continent
   </th>
  </tr>
  <tr>
   <td>
    1
   </td>
   <td align="left">
    <span class="flagicon">
     <img alt="" class="thumbborder" data-file-height="640" data-file-width="1280" decoding="async" height="12" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/88/Flag_of_Australia_%28converted%29.svg/23px-Flag_of_Australia_%28converted%29.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/88/Flag_of_Australia_%28converted%29.svg/35px-Flag_of_Australia_%28converted%29.

In [11]:
rows2 = [row for row in table2.find_all('tr')]  # tr tag is for rows

In [12]:
rows2[0]

<tr>
<th>Rank</th>
<th>Country</th>
<th>Education<br/>Index</th>
<th>Expected years<br/>of schooling</th>
<th>Mean years<br/> of schooling</th>
<th>HDI rank</th>
<th>Continent
</th></tr>

In [13]:
edu_exp = {}

for row in rows2[1:]:
    items = row.find_all('td')
    link = items[1].find('a')
    country = link.text
    exp_yr = items[3].text
    mean_yr = items[4].text
    continent = items[6].text
    edu_exp[country] =  [continent] + [exp_yr] + [mean_yr]
    
edu_exp

{'Australia': ['Oceania\n', '20.4', '13.2'],
 'Denmark': ['Europe\n', '19.2', '12.7'],
 'New Zealand': ['Oceania\n', '19.2', '12.5'],
 'Norway': ['Europe\n', '17.7', '12.7'],
 'Germany': ['Europe\n', '17.1', '13.2'],
 'Ireland': ['Europe\n', '18.6', '12.3'],
 'Iceland': ['Europe\n', '19.0', '12.2'],
 'United States': ['North America\n', '16.5', '13.2'],
 'Netherlands': ['Europe\n', '18.1', '11.9'],
 'United Kingdom': ['Europe\n', '16.3', '13.3'],
 'Switzerland': ['Europe\n', '16.0', '13.4'],
 'Canada': ['North America\n', '16.3', '13.1'],
 'Slovenia': ['Europe\n', '17.3', '12.1'],
 'Lithuania': ['Europe\n', '16.5', '12.7'],
 'Czech Republic': ['Europe\n', '16.8', '12.3'],
 'Estonia': ['Europe\n', '16.5', '12.5'],
 'Israel': ['Asia\n', '16.0', '12.8'],
 'South Korea': ['Asia\n', '16.6', '12.2'],
 'Sweden': ['Europe\n', '16.1', '12.3'],
 'Poland': ['Europe\n', '16.4', '11.9'],
 'Finland': ['Europe\n', '17.0', '11.2'],
 'Japan': ['Asia\n', '15.3', '12.5'],
 'Belgium': ['Europe\n', '16.6',

In [14]:
edu_exp_df = pd.DataFrame.from_dict(data = edu_exp, orient = 'index', columns = ['continent', 'exp_yr', 'mean_yr']).reset_index()
edu_exp_df = edu_exp_df.rename({'index':'country'}, axis = 1)
edu_exp_df = edu_exp_df.replace('\n','', regex = True)
edu_exp_df

Unnamed: 0,country,continent,exp_yr,mean_yr
0,Australia,Oceania,20.4,13.2
1,Denmark,Europe,19.2,12.7
2,New Zealand,Oceania,19.2,12.5
3,Norway,Europe,17.7,12.7
4,Germany,Europe,17.1,13.2
...,...,...,...,...
183,South Sudan,Africa,4.9,4.8
184,Chad,Africa,7.3,2.3
185,Eritrea,Africa,5.0,3.9
186,Burkina Faso,Africa,7.7,1.4


In [15]:
edu_exp_df.to_csv('edu_exp_df.csv')

# GDP Per Capita

In [16]:
url3 = 'https://www.worldometers.info/gdp/gdp-per-capita/'

response3 = requests.get(url3)
page3 = response3.text

soup3 = BeautifulSoup(page3,"lxml")

In [17]:
table3 = soup3.findAll('table')[0]
print(table3.prettify())

<table cellspacing="0" class="table table-striped table-bordered" id="example2" text-align:left="">
 <thead>
  <tr>
   <th>
    #
   </th>
   <th>
    Country
   </th>
   <th>
    GDP (PPP)
    <br/>
    per capita
    <br/>
    (2017)
   </th>
   <th>
    GDP (nominal)
    <br/>
    per capita
    <br/>
    (2017)
   </th>
   <th>
    vs. World PPP
    <br/>
    GDP per capita
    <br/>
    ($17,100)
   </th>
  </tr>
 </thead>
 <tbody>
  <tr>
   <td>
    1
   </td>
   <td style="font-weight: bold; font-size:17px; text-align:left; padding-left:5px; padding-top:10px; padding-bottom:10px">
    <a href="/gdp/qatar-gdp/">
     Qatar
    </a>
   </td>
   <td style="font-weight: bold;">
    $128,647
   </td>
   <td style="font-weight: bold;">
    $61,264
   </td>
   <td style="font-weight: bold;">
    752%
   </td>
  </tr>
  <tr>
   <td>
    2
   </td>
   <td style="font-weight: bold; font-size:17px; text-align:left; padding-left:5px; padding-top:10px; padding-bottom:10px">
    <a href="/gdp

In [18]:
rows3 = [row for row in table3.find_all('tr')]  # tr tag is for rows

In [19]:
print(rows3[0].prettify())

<tr>
 <th>
  #
 </th>
 <th>
  Country
 </th>
 <th>
  GDP (PPP)
  <br/>
  per capita
  <br/>
  (2017)
 </th>
 <th>
  GDP (nominal)
  <br/>
  per capita
  <br/>
  (2017)
 </th>
 <th>
  vs. World PPP
  <br/>
  GDP per capita
  <br/>
  ($17,100)
 </th>
</tr>



In [20]:
gdp_pc = {}

for row in rows3[1:]:
    items = row.find_all('td')
    country = items[1].text
    gdppc = items[3].text
    gdp_pc[country] =  gdppc
    
gdp_pc

{'Qatar': '$61,264',
 'Macao': '$80,890',
 'Luxembourg': '$105,280',
 'Singapore': '$56,746',
 'Brunei ': '$28,572',
 'Ireland': '$69,727',
 'United Arab Emirates': '$40,325',
 'Kuwait': '$29,616',
 'Switzerland': '$80,296',
 'San Marino': '$48,495',
 'Norway': '$75,428',
 'Hong Kong': '$46,733',
 'United States': '$59,939',
 'Iceland': '$73,233',
 'Netherlands': '$48,796',
 'Denmark': '$57,545',
 'Saudi Arabia': '$20,747',
 'Austria': '$47,261',
 'Germany': '$44,680',
 'Sweden': '$54,075',
 'Australia': '$53,831',
 'Belgium': '$43,325',
 'Bahrain': '$23,715',
 'Canada': '$44,841',
 'Finland': '$45,778',
 'United Kingdom': '$39,532',
 'France': '$39,827',
 'Japan': '$38,214',
 'Oman': '$15,170',
 'Italy': '$32,038',
 'Malta': '$28,585',
 'New Zealand': '$43,415',
 'Aruba': '$25,630',
 'Spain': '$28,175',
 'Israel': '$42,852',
 'South Korea': '$29,958',
 'Czech Republic (Czechia)': '$20,291',
 'Slovenia': '$23,488',
 'Cyprus': '$18,695',
 'Estonia': '$20,170',
 'Lithuania': '$16,709',
 

In [21]:
gdp_pc_df = pd.DataFrame.from_dict(data = gdp_pc, orient = 'index', columns = ['gdppc']).reset_index()
gdp_pc_df = gdp_pc_df.rename({'index':'country'}, axis = 1)
gdp_pc_df = gdp_pc_df.replace('\n','', regex = True)
gdp_pc_df.gdppc = gdp_pc_df.gdppc.apply(lambda x: x.strip('$'))
gdp_pc_df

Unnamed: 0,country,gdppc
0,Qatar,61264
1,Macao,80890
2,Luxembourg,105280
3,Singapore,56746
4,Brunei,28572
...,...,...
184,American Samoa,11399
185,Andorra,39128
186,Guam,35665
187,Cuba,8541


In [22]:
gdp_pc_df.to_csv('gdp_df.csv')

# Life Expectancy

In [23]:
url4 = 'https://en.wikipedia.org/wiki/List_of_countries_by_life_expectancy'

response4 = requests.get(url4)
page4 = response4.text

soup4 = BeautifulSoup(page4,"lxml")

In [24]:
table4 = soup4.findAll('table')[0]
print(table4.prettify())

<table class="wikitable sortable">
 <caption>
  Countries by life expectancy at birth in 2018 (2019 report)
  <sup class="reference" id="cite_ref-UNDP2019_7-0">
   <a href="#cite_note-UNDP2019-7">
    [7]
   </a>
  </sup>
  <sup class="reference" id="cite_ref-8">
   <a href="#cite_note-8">
    [8]
   </a>
  </sup>
 </caption>
 <tbody>
  <tr>
   <th rowspan="2">
    Rank
   </th>
   <th rowspan="2">
    Country/Region
   </th>
   <th colspan="3">
    Life expectancy at birth (in years)
   </th>
  </tr>
  <tr>
   <th>
    Overall
   </th>
   <th>
    Female
   </th>
   <th>
    Male
   </th>
  </tr>
  <tr>
   <td>
    1
   </td>
   <td>
    <span class="flagicon">
     <img alt="" class="thumbborder" data-file-height="600" data-file-width="900" decoding="async" height="15" src="//upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Flag_of_Hong_Kong.svg/23px-Flag_of_Hong_Kong.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Flag_of_Hong_Kong.svg/35px-Flag_of_Hong_Kong.sv

In [25]:
rows4 = [row for row in table4.find_all('tr')]  # tr tag is for rows
print(rows4[2].prettify())

<tr>
 <td>
  1
 </td>
 <td>
  <span class="flagicon">
   <img alt="" class="thumbborder" data-file-height="600" data-file-width="900" decoding="async" height="15" src="//upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Flag_of_Hong_Kong.svg/23px-Flag_of_Hong_Kong.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Flag_of_Hong_Kong.svg/35px-Flag_of_Hong_Kong.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/5/5b/Flag_of_Hong_Kong.svg/45px-Flag_of_Hong_Kong.svg.png 2x" width="23"/>
  </span>
  <a href="/wiki/Hong_Kong" title="Hong Kong">
   Hong Kong
  </a>
 </td>
 <td>
  84.7
 </td>
 <td>
  87.6
 </td>
 <td>
  81.8
 </td>
</tr>



In [26]:
life_exp = {}

for row in rows4[2:]:
    items = row.find_all('td')
    country = items[1].find('a').text
    overall = items[2].text
    female = items[3].text
    male = items[4].text
    life_exp[country] =  [overall] + [female] + [male]

life_exp

{'Hong Kong': ['84.7\n', '87.6\n', '81.8\n'],
 'Japan': ['84.5\n', '87.5\n', '81.3\n'],
 'Singapore': ['83.7\n', '85.7\n', '81.4\n'],
 'Switzerland': ['83.6\n', '85.5\n', '81.7\n'],
 'Italy': ['83.4\n', '85.4\n', '81.1\n'],
 'Spain': ['83.4\n', '86.1\n', '80.7\n'],
 'Australia': ['83.3\n', '85.3\n', '81.3\n'],
 'Iceland': ['82.9\n', '84.4\n', '81.3\n'],
 'Israel': ['82.8\n', '84.4\n', '81.1\n'],
 'South Korea': ['82.8\n', '85.8\n', '79.7\n'],
 'Sweden': ['82.7\n', '84.4\n', '80.9\n'],
 'France': ['82.5\n', '85.4\n', '79.6\n'],
 'Malta': ['82.4\n', '84.1\n', '80.5\n'],
 'Canada': ['82.3\n', '84.3\n', '80.3\n'],
 'Norway': ['82.3\n', '84.3\n', '80.3\n'],
 'Luxembourg': ['82.1\n', '84.2\n', '80.0\n'],
 'Ireland': ['82.1\n', '83.7\n', '80.4\n'],
 'New Zealand': ['82.1\n', '83.9\n', '80.4\n'],
 'Netherlands': ['82.1\n', '83.8\n', '80.4\n'],
 'Greece': ['82.1\n', '84.5\n', '79.6\n'],
 'Portugal': ['81.9\n', '84.7\n', '78.8\n'],
 'Andorra': ['81.8\n', '\n', '\n'],
 'Finland': ['81.7\n', '84.6

In [27]:
life_exp_df = pd.DataFrame.from_dict(data = life_exp, orient = 'index', columns = ['overall_life', 'female_life', 'male_life']).reset_index()
life_exp_df = life_exp_df.rename({'index':'country'}, axis = 1)
life_exp_df = life_exp_df.replace('\n','', regex = True)
life_exp_df

Unnamed: 0,country,overall_life,female_life,male_life
0,Hong Kong,84.7,87.6,81.8
1,Japan,84.5,87.5,81.3
2,Singapore,83.7,85.7,81.4
3,Switzerland,83.6,85.5,81.7
4,Italy,83.4,85.4,81.1
...,...,...,...,...
186,Nigeria,54.3,55.2,53.5
187,Sierra Leone,54.3,55.1,53.5
188,Chad,54.0,55.4,52.6
189,Lesotho,53.7,57.0,50.6


In [28]:
life_exp_df.to_csv('life_exp_df.csv')

Teriary Edu: universities as well as institutions that teach specific capacities of higher learning such as colleges, technical training institutes, community colleges, nursing schools, research laboratories, centers of excellence, and distance learning centers


# Tertiary Education Attainment

In [29]:
url5 = 'https://en.wikipedia.org/wiki/List_of_countries_by_tertiary_education_attainment'

response5 = requests.get(url5)
page5 = response5.text

soup5 = BeautifulSoup(page5,"lxml")

In [30]:
table5 = soup5.findAll('table')[0]
print(table5.prettify())

<table class="wikitable sortable" style="text-align:center">
 <tbody>
  <tr style="background:#ececec; vertical-align:top;">
   <th rowspan="2">
    <br/>
    Country
   </th>
   <th rowspan="2">
    Age
    <br/>
    25–64
    <br/>
    (%)
   </th>
   <th colspan="4">
    Age
   </th>
   <th rowspan="2">
    <br/>
    Year
   </th>
   <th rowspan="2">
    <br/>
    Non-OECD
   </th>
  </tr>
  <tr>
   <th>
    25–34
    <br/>
    (%)
   </th>
   <th>
    35–44
    <br/>
    (%)
   </th>
   <th>
    45–54
    <br/>
    (%)
   </th>
   <th>
    55–64
    <br/>
    (%)
   </th>
  </tr>
  <tr>
   <td align="left">
    <span class="flagicon">
     <img alt="" class="thumbborder" data-file-height="640" data-file-width="1280" decoding="async" height="12" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/88/Flag_of_Australia_%28converted%29.svg/23px-Flag_of_Australia_%28converted%29.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/88/Flag_of_Australia_%28converted%29.svg/

In [31]:
rows5 = [row for row in table5.find_all('tr')]  # tr tag is for rows
print(rows5[2].prettify())

<tr>
 <td align="left">
  <span class="flagicon">
   <img alt="" class="thumbborder" data-file-height="640" data-file-width="1280" decoding="async" height="12" src="//upload.wikimedia.org/wikipedia/commons/thumb/8/88/Flag_of_Australia_%28converted%29.svg/23px-Flag_of_Australia_%28converted%29.svg.png" srcset="//upload.wikimedia.org/wikipedia/commons/thumb/8/88/Flag_of_Australia_%28converted%29.svg/35px-Flag_of_Australia_%28converted%29.svg.png 1.5x, //upload.wikimedia.org/wikipedia/commons/thumb/8/88/Flag_of_Australia_%28converted%29.svg/46px-Flag_of_Australia_%28converted%29.svg.png 2x" width="23"/>
  </span>
  <a href="/wiki/Australia" title="Australia">
   Australia
  </a>
 </td>
 <td>
  42
 </td>
 <td>
  48
 </td>
 <td>
  46
 </td>
 <td>
  38
 </td>
 <td>
  33
 </td>
 <td>
  2014
 </td>
 <td>
 </td>
</tr>



In [32]:
tert_edu = {}

for row in rows5[2:]:
    items = row.find_all('td')
    country = items[0].find('a').text
    _25_64 = items[1].text
    _25_34 = items[2].text
    _35_44 = items[3].text
    _45_54 = items[4].text
    _55_64 = items[5].text
    tert_edu[country] =  [_25_64] + [_25_34] + [_35_44] + [_45_54] + [_55_64]

tert_edu

{'Australia': ['42', '48', '46', '38', '33'],
 'Austria': ['30', '38', '33', '27', '21'],
 'Belgium': ['37', '44', '42', '34', '26'],
 'Brazil': ['14', '15', '14', '14', '11'],
 'Canada': ['54', '58', '61', '51', '45'],
 'Chile': ['21', '27', '24', '17', '14'],
 'China': ['17', '27', '15', '7', '2'],
 'Colombia': ['22', '28', '23', '18', '16'],
 'Costa Rica': ['18', '21', '19', '17', '17'],
 'Czech Republic': ['22', '30', '21', '20', '15'],
 'Denmark': ['36', '42', '41', '33', '29'],
 'Estonia': ['38', '40', '39', '35', '36'],
 'Finland': ['42', '40', '50', '44', '34'],
 'France': ['32', '44', '39', '26', '20'],
 'Germany': ['27', '28', '29', '26', '25'],
 'Greece': ['28', '39', '27', '26', '21'],
 'Hungary': ['23', '32', '25', '20', '17'],
 'Iceland': ['37', '41', '42', '36', '29'],
 'Indonesia': ['8', '10', '9', '8', '4'],
 'Ireland': ['41', '51', '49', '34', '24'],
 'Israel': ['49', '46', '53', '48', '47'],
 'Italy': ['17', '24', '19', '13', '12'],
 'Japan': ['48', '59', '53', '47',

In [33]:
tert_edu_df = pd.DataFrame.from_dict(data = tert_edu, orient = 'index', columns = ['_25_64', '_25_34', '_35_44', '_45_54', '_55_64']).reset_index()
tert_edu_df = tert_edu_df.rename({'index':'country'}, axis = 1)
tert_edu_df

Unnamed: 0,country,_25_64,_25_34,_35_44,_45_54,_55_64
0,Australia,42,48,46,38,33
1,Austria,30,38,33,27,21
2,Belgium,37,44,42,34,26
3,Brazil,14,15,14,14,11
4,Canada,54,58,61,51,45
5,Chile,21,27,24,17,14
6,China,17,27,15,7,2
7,Colombia,22,28,23,18,16
8,Costa Rica,18,21,19,17,17
9,Czech Republic,22,30,21,20,15


In [34]:
tert_edu_df.to_csv('tert_edu_df.csv')