# Gapminder data
## The data in this lesson was obtained from the site gapminder.org. The variables included are:

- Aged 15+ Employment Rate (%)
- Life Expectancy (years)
- GDP/capita (US$, inflation adjusted)
- Primary school completion (% of boys)
- Primary school completion (% of girls)

### 1. Importing libraries

In [2]:
# importing numpy and pandas

import numpy as np
import pandas as pd
print('All imports completed!')

All imports completed!


### 2. Reading files

In [3]:
# reading files using pandas

employment_above_15 = pd.read_csv('employment_above_15.csv')
life_expectancy = pd.read_csv('life_expectancy.csv')
gdp_per_capita = pd.read_csv('gdp_per_capita.csv')
female_completion_rate = pd.read_csv('female_completion_rate.csv')
male_completion_rate = pd.read_csv('male_completion_rate.csv')
print('All data is loaded!')

All data is loaded!


### 3. Check out some values!

In [27]:
# print first 20 countries in employments file:

print(employment_above_15['Country'][:20])

0                Afghanistan
1                    Albania
2                    Algeria
3                     Angola
4                  Argentina
5                    Armenia
6                  Australia
7                    Austria
8                 Azerbaijan
9                    Bahamas
10                   Bahrain
11                Bangladesh
12                  Barbados
13                   Belarus
14                   Belgium
15                    Belize
16                     Benin
17                    Bhutan
18                   Bolivia
19    Bosnia and Herzegovina
Name: Country, dtype: object


In [30]:
# print first 20 employment values for 2007 year in employments file:

print(employment_above_15['2007'][:20])

0     55.700001
1     51.400002
2     50.500000
3     75.699997
4     58.400002
5     40.099998
6     61.500000
7     57.099998
8     60.900002
9     66.599998
10    60.400002
11    68.099998
12    66.900002
13    53.400002
14    48.599998
15    56.799999
16    71.599998
17    58.400002
18    70.400002
19    41.200001
Name: 2007, dtype: float64


### 4. Operating with datasets.

#### 4.1. Employment data.

In [28]:
# function that prints out the country 
# with maximum employment in given bunch of data
# for given year
def print_max_country_employment(data, size, year):
    i = data[year][:size].values.argmax()
    country = data['Country'][:size][i]
    value = data[year][:size][i]
    print('Country with maximum employment({}) is {}'.format(value, country))

In [29]:
# for our bunch print maximum value of employment and the country.
print_max_country_employment(employment_above_15, 20, '2007')

Country with maximum employment(75.6999969482) is Angola


#### 4.2. Completion data.

In [35]:
#  this function returns a NumPy array 
# containing the overall school completion rate for each country
# The arguments are NumPy arrays giving the female and male completion 
# of each country in the same order.
def overall_completion_rate(female_completion, male_completion):
    f_c = np.array(female_completion)
    m_c = np.array(male_completion)
    result = (f_c + m_c)/2
    return result

In [37]:
print(overall_completion_rate(female_completion_rate['2007'][:20], male_completion_rate['2007'][:20]))

[       nan        nan        nan        nan  96.416025        nan
        nan        nan        nan        nan 102.644275 101.41129
  93.316285        nan 103.455575  98.148215 102.35113         nan
        nan  91.77855 ]
