In [12]:
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

In [13]:
df = pd.read_csv("espectativa_vida.csv")

In [14]:
novo_df = pd.DataFrame({'country': df['Country'],
                       'life_expectancy': df['Life expectancy '],
                       'year': df['Year'],
                       'status': df['Status'],
                       'adult_mortality': df['Adult Mortality'],
                       'inf_death': df['infant deaths'],
                       'alcohol': df['Alcohol'],
                       'hepatitisB': df['Hepatitis B'],
                       'measles': df['Measles '],
                       'bmi': df[' BMI '],
                       'polio': df['Polio'],
                       'diphtheria': df['Diphtheria '],
                       'hiv': df[' HIV/AIDS'],
                       'gdp': df['GDP'],
                       'total_expenditure': df['Total expenditure'],
                       'thinness_till19': df[' thinness  1-19 years'],
                       'thinness_till9': df[' thinness 5-9 years'],
                       'school': df['Schooling'],
                       'population': df[' Population']})

In [15]:
def filtra_anos(data, years):
    """
    Filtra os dados pelos anos fornecidos, calcula as médias das variáveis numéricas 
    (exceto 'year') para cada país no intervalo, e substitui os valores pelas médias,
    preservando as linhas e os anos no DataFrame.

    """
    # Filtrar os dados pelos anos fornecidos
    filtered_data = data[data['year'].isin(years)]
    
    # Selecionar as colunas numéricas, excluindo "year"
    numeric_columns = filtered_data.select_dtypes(include='number').columns
    numeric_columns = [col for col in numeric_columns if col != 'year']
    
    # Iterar por cada país no DataFrame
    for country in filtered_data['country'].unique():
        # Filtrar os dados do país atual
        country_data = filtered_data[filtered_data['country'] == country]
        
        # Calcular as médias para as colunas numéricas
        means = country_data[numeric_columns].mean()
        
        # Substituir os valores nas colunas numéricas pelas médias calculadas
        for column in numeric_columns:
            filtered_data.loc[filtered_data['country'] == country, column] = means[column]
    
    # Retornar o DataFrame filtrado com as linhas preservadas
    return filtered_data


In [16]:
anos_desejados = [2013, 2014, 2015]
resultado = filtra_anos(novo_df, anos_desejados)

In [17]:
resultado

Unnamed: 0,country,life_expectancy,year,status,adult_mortality,inf_death,alcohol,hepatitisB,measles,bmi,polio,diphtheria,hiv,gdp,total_expenditure,thinness_till19,thinness_till9,school,population
0,Afghanistan,61.600000,2015,Developing,267.333333,64.000000,0.010000,63.666667,692.000000,18.600000,42.000000,63.666667,0.100000,609.566900,8.156667,17.466667,17.500000,10.000000,2.193192e+07
1,Afghanistan,61.600000,2014,Developing,267.333333,64.000000,0.010000,63.666667,692.000000,18.600000,42.000000,63.666667,0.100000,609.566900,8.156667,17.466667,17.500000,10.000000,2.193192e+07
2,Afghanistan,61.600000,2013,Developing,267.333333,64.000000,0.010000,63.666667,692.000000,18.600000,42.000000,63.666667,0.100000,609.566900,8.156667,17.466667,17.500000,10.000000,2.193192e+07
16,Albania,77.500000,2015,Developing,55.333333,0.000000,4.623333,98.666667,0.000000,57.233333,98.666667,98.666667,0.100000,4314.904919,5.846667,1.233333,1.333333,14.200000,2.024597e+05
17,Albania,77.500000,2014,Developing,55.333333,0.000000,4.623333,98.666667,0.000000,57.233333,98.666667,98.666667,0.100000,4314.904919,5.846667,1.233333,1.333333,14.200000,2.024597e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2907,Zambia,61.966667,2014,Developing,225.000000,28.000000,1.210000,58.000000,17.666667,22.833333,53.666667,58.000000,4.400000,1079.521735,4.990000,6.333333,6.166667,12.500000,1.079961e+06
2908,Zambia,61.966667,2013,Developing,225.000000,28.000000,1.210000,58.000000,17.666667,22.833333,53.666667,58.000000,4.400000,1079.521735,4.990000,6.333333,6.166667,12.500000,1.079961e+06
2922,Zimbabwe,61.400000,2015,Developing,368.666667,23.333333,6.445000,91.000000,0.000000,22.300000,91.666667,91.000000,6.433333,119.131949,6.660000,5.900000,5.733333,10.333333,1.044819e+07
2923,Zimbabwe,61.400000,2014,Developing,368.666667,23.333333,6.445000,91.000000,0.000000,22.300000,91.666667,91.000000,6.433333,119.131949,6.660000,5.900000,5.733333,10.333333,1.044819e+07


In [18]:
resultado.to_csv('analise_2013_a_2015.csv', index=False)