# Exercícios Práticos

Crie um dataframe único com a informação do GDP e quantidade de matrículas por nível de educação por ano.

O aumento das matrículas na educação primária, secundária e terciária acompanhou o aumento do GDP per capita nos últimos 15 anos na India?

# Importação da biblioteca pandas e leitura do csv de Education

In [None]:
import pandas as pd

educ = 'https://raw.githubusercontent.com/abnr/ml-data/main/SYB61_T07_Education.csv'
df_educ = pd.read_csv(educ, header=1, sep='\t', encoding='unicode_escape')

# Apresenta o DataFrame construido a partir do csv de Education

In [None]:
df_educ.info()

# Filtro no DataFrame Education, seleciona India e os matriculados.

In [None]:
df_educ.rename(columns={'Unnamed: 1': 'Country'}, inplace=True)
df_educ = df_educ[df_educ['Country'] == 'India']
series = ['Students enrolled in primary education (thousands)',
          'Students enrolled in secondary education (thousands)',
          'Students enrolled in tertiary education (thousands)']
gdp = df_educ['Series'].isin(series)
df_educ = df_educ[gdp]
df_educ

# Tratamento do DataFrame Education, retirando a vírgula

In [None]:
df_educ['Value'] = df_educ['Value'].astype(str).str.replace(',', '').astype(int)
df_educ

# Tratamento do DataFrame Education, removendo colunas desnecessárias

In [None]:
df_educ.drop(['Region/Country/Area', 'Country', 'Source', 'Footnotes', 'Series'], axis=1, inplace=True)
df_educ

# Tratamento do DataFrame Education, agrupando por ano

In [None]:
df_educ = df_educ.groupby(['Year']).sum().astype(int)
df_educ

# Importação da biblioteca pandas e leitura do csv de India World Bank

In [None]:
import pandas as pd

gdp = 'https://raw.githubusercontent.com/abnr/ml-data/main/India_World_Bank_Info.csv'
df_gdp = pd.read_csv(gdp, sep='\t', on_bad_lines='error', skiprows=4)

# Apresenta o DataFrame construido a partir do csv de India World Bank

In [None]:
df_gdp.info()

In [None]:
filtro = df_gdp['Indicator Name'].str.contains('GDP per capita \\(current US\\$\\)')
df_gdp = df_gdp[filtro]
df_gdp

In [None]:
colunas = range(2003, 2018)
df_gdp = pd.melt(df_gdp, id_vars=['Indicator Name'],
                 value_vars=list(map(str, colunas)),
                 var_name='Year', value_name='Value')
df_gdp = df_gdp[['Year', 'Value']].groupby(['Year']).sum()
df_gdp['Value'] = df_gdp['Value'].apply(lambda o: int(round(o)))
df_gdp

In [None]:
df_gdp.index = df_gdp.index.astype(int)
df = df_gdp.merge(df_educ, on='Year', how='outer', suffixes=('_gdp', '_educ'))
df

In [None]:
df['Value_educ'] = df['Value_educ'].interpolate(method='krogh')
df['Value_educ'] = df['Value_educ']/10
df['Value_gdp'] = df['Value_gdp']*10
df['Value_educ'] = df['Value_educ'].astype(int)
df

In [None]:
import matplotlib.pyplot as plt


plt.figure(figsize=(15, 10))
x = df.index
y1 = df['Value_educ']
y2 = df['Value_gdp']

plt.plot(x, y1, label='Education')
plt.plot(x, y2, label='GDP')
for i, j in zip(x, y1):
    plt.text(i, j, str(j), ha='center')
for i, j in zip(x, y2):
    plt.text(i, j, str(j), ha='center')
plt.xticks(x)
plt.ylim([5000, 37000])
plt.grid()
plt.xlabel('Year')
plt.ylabel('Value')
plt.title('GDP x Education')

plt.legend()
plt.show()
df