In [84]:
import pandas as pd
import numpy as np
import scipy.stats as st
import plotly.express as px
import plotly.graph_objects as go
import psycopg2
import random

In [85]:
def loadData(url):
    try:
        dataset = pd.read_csv(url,delimiter='\t' )
        print(dataset.shape)
        return dataset;
    except:
        print("Oops!", sys.exc_info()[0], "occurred.")

In [86]:
path_file = '../Dataset/inep_saeb_merge_fatorial_2019.csv'
df = loadData(path_file)
df.head()

(62668, 197)


Unnamed: 0,ID_SAEB,ID_REGIAO,ID_UF,CO_ENTIDADE,NU_PRESENTES_5EF,NU_PRESENTES_9EF,NU_PRESENTES_EMT,NU_PRESENTES_EMI,NU_PRESENTES_EM,MEDIA_5EF_LP,...,IN_ESP_EXCLUSIVA_FUND_AF,IN_ESP_EXCLUSIVA_MEDIO_MEDIO,Estrutura,PED,LibrasBraile,TratamentoLixo,PCD,Acessibilidade,Internet,Transporte
0,2019,1,11,11024666,18.0,13.0,,,,184.83,...,0.0,0.0,-1.09,-0.29,-0.28,-0.22,0.29,-1.51,0.14,0.32
1,2019,1,11,11024682,149.0,,,,,223.45,...,0.0,0.0,-0.97,-0.28,-0.5,0.73,1.53,-1.18,0.58,0.16
2,2019,1,11,11024828,21.0,14.0,,,,158.06,...,0.0,0.0,-1.06,-0.29,-0.14,-0.23,-0.5,-1.4,0.04,0.28
3,2019,1,11,11024968,,,66.0,,66.0,,...,0.0,0.0,-1.01,-0.29,-0.17,-0.24,-0.7,-1.49,0.55,-0.17
4,2019,1,11,11025077,13.0,9.0,,,,173.45,...,0.0,0.0,-1.09,-0.3,-0.29,-0.22,0.08,-1.36,-0.03,0.33


In [96]:
dict_uf= {11:'RO',12:'AC',13:'AM',14:'RR',15:'PA',16:'AP',17:'TO',21:'MA',22:'PI',23:'CE',24:'RN',
              25:'PB',26:'PE',27:'AL',28:'SE',29:'BA',31:'MG',32:'ES',33:'RJ',35:'SP',41:'PR',42:'SC',43:'RS',50:'MS',
              51:'MT',52:'GO',53:'DF',}
df = df.replace({"ID_UF": dict_uf})
df.head()

Unnamed: 0,ID_SAEB,ID_REGIAO,ID_UF,CO_ENTIDADE,NU_PRESENTES_5EF,NU_PRESENTES_9EF,NU_PRESENTES_EMT,NU_PRESENTES_EMI,NU_PRESENTES_EM,MEDIA_5EF_LP,...,IN_ESP_EXCLUSIVA_FUND_AF,IN_ESP_EXCLUSIVA_MEDIO_MEDIO,Estrutura,PED,LibrasBraile,TratamentoLixo,PCD,Acessibilidade,Internet,Transporte
0,2019,1,RO,11024666,18.0,13.0,,,,184.83,...,0.0,0.0,-1.09,-0.29,-0.28,-0.22,0.29,-1.51,0.14,0.32
1,2019,1,RO,11024682,149.0,,,,,223.45,...,0.0,0.0,-0.97,-0.28,-0.5,0.73,1.53,-1.18,0.58,0.16
2,2019,1,RO,11024828,21.0,14.0,,,,158.06,...,0.0,0.0,-1.06,-0.29,-0.14,-0.23,-0.5,-1.4,0.04,0.28
3,2019,1,RO,11024968,,,66.0,,66.0,,...,0.0,0.0,-1.01,-0.29,-0.17,-0.24,-0.7,-1.49,0.55,-0.17
4,2019,1,RO,11025077,13.0,9.0,,,,173.45,...,0.0,0.0,-1.09,-0.3,-0.29,-0.22,0.08,-1.36,-0.03,0.33


In [88]:
#Agrupar por Nacional
#Veririfcar Estado com maior/menor pontuacao 
cols = ['Estrutura','PED','LibrasBraile','TratamentoLixo','PCD','Acessibilidade','Internet','Transporte']
#data_brasil_fatores = df[cols].mean()

data_brasil_fatores = pd.DataFrame(df[cols].mean().to_dict(),index=[df[cols].index.values[-1]])

data_brasil_fatores.head()

Unnamed: 0,Estrutura,PED,LibrasBraile,TratamentoLixo,PCD,Acessibilidade,Internet,Transporte
62667,-6e-06,-5e-06,9e-06,1.4e-05,2e-06,-7e-06,5e-06,-1.1e-05


In [97]:
#Agrupar por Estado
#Veririfcar Estado com maior/menor pontuacao 
data_estado_fatores = df.groupby(['ID_UF',]).agg({"Estrutura": ["mean"],
                                                  "PED": ["mean"],
                                                  "LibrasBraile": ["mean"],
                                                 "TratamentoLixo": ["mean"],
                                                 "PCD": ["mean"],
                                                 "Acessibilidade": ["mean"],
                                                  "Internet": ["mean"],
                                                  "Transporte": ["mean"],
                                                  
                                                 },
                                                                                 split_out=4)

data_estado_fatores = data_estado_fatores.reset_index()
data_estado_fatores.columns = ['Estado','Estrutura','PED',
                               'LibrasBraile','TratamentoLixo','PCD','Acessibilidade','Internet','Transporte']
data_estado_fatores.sort_values(['Estrutura'], ascending=[True])
data_estado_fatores.head()

Unnamed: 0,Estado,Estrutura,PED,LibrasBraile,TratamentoLixo,PCD,Acessibilidade,Internet,Transporte
0,AC,-1.071921,-0.114177,-0.026646,-0.059726,1.140488,-1.238963,0.11997,-0.669451
1,AL,-1.093893,-0.156753,-0.172262,-0.165973,0.34398,-0.507392,0.165563,0.046525
2,AM,-0.54328,-0.044762,-0.292696,-0.143387,0.014355,-1.562389,0.090154,-0.531306
3,AP,-1.095925,-0.136644,-0.117432,-0.070548,0.746507,-1.359897,0.187192,-0.738425
4,BA,0.292942,0.058104,0.134164,0.222685,-0.17082,-0.63163,-0.734361,0.203798


In [111]:
#Agrupar por Regiao
#Veririfcar Estado com maior/menor pontuacao 
data_regiao_fatores = df.groupby(['ID_REGIAO',]).agg({"Estrutura": ["mean"],
                                                  "PED": ["mean"],
                                                  "LibrasBraile": ["mean"],
                                                 "TratamentoLixo": ["mean"],
                                                 "PCD": ["mean"],
                                                 "Acessibilidade": ["mean"],
                                                  "Internet": ["mean"],
                                                  "Transporte": ["mean"],
                                                  
                                                 },
                                                                                 split_out=4)

data_regiao_fatores = data_regiao_fatores.reset_index()
data_regiao_fatores.columns = ['Regiao','Estrutura','PED',
                               'LibrasBraile','TratamentoLixo','PCD','Acessibilidade','Internet','Transporte']
data_regiao_fatores.sort_values(['Estrutura'], ascending=[True])
data_regiao_fatores.head()

Unnamed: 0,Regiao,Estrutura,PED,LibrasBraile,TratamentoLixo,PCD,Acessibilidade,Internet,Transporte
0,1,-0.692924,-0.054526,0.049831,-0.075289,0.353805,-1.49383,-0.014121,-0.372056
1,2,-0.324657,-0.033245,-0.072131,0.04731,-0.028739,-0.615854,-0.242183,0.051074
2,3,1.027076,0.07014,-0.023575,-0.002644,-0.008133,0.234359,0.076328,-0.010067
3,4,-0.536984,0.00979,0.02649,-0.010544,-0.157284,1.160835,0.310241,0.128019
4,5,-0.992818,-0.100813,0.361023,-0.098031,-0.007505,1.872333,0.249869,0.070062


In [113]:
#Agrupar por Escola
#Veririfcar Estado com maior/menor pontuacao 
nome_Escola='E M E F PROFESSORA DALILA LEAO'
filter = ((df['ID_UF'] == 'PA') & (df.NO_ENTIDADE == nome_Escola))
data_escola_ = df[filter]

cols = ['Estrutura','PED','LibrasBraile','TratamentoLixo','PCD','Acessibilidade','Internet','Transporte']
#data_brasil_fatores = df[cols].mean()

data_escola_fatores = pd.DataFrame(data_escola_[cols].mean().to_dict(),index=[data_escola_[cols].index.values[-1]])

data_escola_fatores.head()


Unnamed: 0,Estrutura,PED,LibrasBraile,TratamentoLixo,PCD,Acessibilidade,Internet,Transporte
3423,-0.52,-0.29,-0.1,-0.23,-0.46,-1.36,-0.2,-1.76


In [109]:
def plotRadarCensoSaeb():
   
    #Processamento no contexto nacional
    
    
    dimensoes = list(data_brasil_fatores.columns)
    dimensoes = [*dimensoes, dimensoes[0]]

    esc = data_brasil_fatores.values.flatten().tolist()
    esc = [*esc, esc[0]]
    
    
    #Processamento no contexto Estadual
    
    dimensoes_estado = list(data_estado_fatores[data_estado_fatores.Estado == 'PA'].columns)
    dimensoes_estado = [*dimensoes_estado, dimensoes_estado[0]]

    esc_estado = data_estado_fatores[data_estado_fatores.Estado == 'PA'].values.flatten().tolist()
    esc_estado = [*esc_estado, esc_estado[0]]    
    
    
    #Processamento no contexto Escolar    
    
       

    dimensoes_escola = list(data_escola_fatores.columns)
    dimensoes_escola = [*dimensoes_escola, dimensoes_escola[0]]

    esc_escola = data_escola_fatores.values.flatten().tolist()
    esc_escola = [*esc_escola, esc_escola[0]]
    
    #Processamento no contexto do Aluno
    #Pode usar id_aluno ou nome do aluno
    
    
    fig = go.Figure(
        data=[
            go.Scatterpolar(r=esc_estado, theta=dimensoes_estado, name='Estado'),
            go.Scatterpolar(r=esc_escola, theta=dimensoes_escola, name='Escola'),
            go.Scatterpolar(r=esc, theta=dimensoes, name='Nacional'),
           
        ],
        layout=go.Layout(
            title_y=0.98,title_x=0.25,
            title=go.layout.Title(text='Fatores SAEB-CENSO - '+nome_Escola),
            polar={'radialaxis': {'visible': False}},
            showlegend=True
        )
    )
    return fig

In [110]:
fig = plotRadarCensoSaeb()
fig.show()