# Análise de Performance de Estudantes

#### Importações 

In [9]:
from repository.loader_datasource import load_csv_data
from services.dataframe_formatter import header_formatter

import plotly.graph_objs as go
import plotly.express as px

#### Carregamento e Tratativa dos Dados

In [10]:
# Define the path to the datasource
path = '../datasource/StudentsPerformance.csv'

# Call the function to load the data and format the header
df = header_formatter(load_csv_data(path))

In [11]:
# Add a new column to the dataframe with the average of the three tests
df['final_score_avg'] = df[['math_score', 'reading_score', 'writing_score']].mean(axis=1).round()

#### Analise Gráfica

In [12]:
# Group the final_score_avg by gender
df_final_avg_grouped_by_gender = df.groupby(['gender', 'race_ethnicity'])['final_score_avg'].mean().reset_index()

# Create a plotly bar chart
fig = px.bar(df_final_avg_grouped_by_gender, 
             x='race_ethnicity', y='final_score_avg', 
             text='final_score_avg', 
             title='Comparação de Médias Finais por Gênero e Etnia',  
             labels={'final_score_avg': 'Média de Notas Finais', 'gender': 'Gênero'}, 
             color='gender', barmode='group')

fig.update_traces(texttemplate='%{text:.2s}', textposition='inside')

fig.show()

#### Média de Pontuação Final por Gênero e Grupo Étnico
##### Em uma analise inicial, é possível perceber que de uma maneira global para todas as etinias o genero feminino possui destaque em relação ao genero masculino.

| gender | race_ethnicity | final_score_avg |
|--------|----------------|-----------------|
| female | group A        | 65.083333       |
| female | group B        | 67.548077       |
| female | group C        | 68.588889       |
| female | group D        | 71.441860       |
| female | group E        | 74.014493       |
| male   | group A        | 61.509434       |
| male   | group B        | 62.988372       |
| male   | group C        | 65.223022       |
| male   | group D        | 66.984962       |
| male   | group E        | 71.450704       |




#### Análise Gráfica de Distribuição das Notas

In [13]:
# Create a plot scatter with the score distribution
fig = px.scatter(df, x='final_score_avg', y='math_score', color='gender', title='Distribuição de Notas de Leitura por Média Final', labels={'final_score_avg': 'Média de Notas Finais', 'reading_score': 'Nota de Leitura'})

fig.show()

In [14]:
fig = px.box(df, x="race_ethnicity", y="final_score_avg", color="gender",
             title="Distribuição da Pontuação Final por Gênero e Grupo Étnico",
             labels={"race_ethnicity": "Grupo Étnico", "final_score_avg": "Média da Pontuação Final"},
             points="all")  # Adiciona pontos individuais ao box plot

fig.show()

In [15]:
df_gender_groupeb_test_preparation = df.groupby(['gender', 'test_preparation_course'])['final_score_avg'].median().reset_index()

fig = px.bar(df_gender_groupeb_test_preparation, 
             x='test_preparation_course', 
             y='final_score_avg', 
             color='gender', 
             title='Distribuição da Pontuação Final por Gênero e Preparação para o Teste', 
             labels={'final_score_avg': 'Média da Pontuação Final'}, 
             barmode='group',
             category_orders={'test_preparation_course': ['none', 'completed']})

fig.show()

In [16]:
df

Unnamed: 0,gender,race_ethnicity,parental_level_of_education,lunch,test_preparation_course,math_score,reading_score,writing_score,final_score_avg
0,female,group B,bachelor's degree,standard,none,72,72,74,73.0
1,female,group C,some college,standard,completed,69,90,88,82.0
2,female,group B,master's degree,standard,none,90,95,93,93.0
3,male,group A,associate's degree,free/reduced,none,47,57,44,49.0
4,male,group C,some college,standard,none,76,78,75,76.0
...,...,...,...,...,...,...,...,...,...
995,female,group E,master's degree,standard,completed,88,99,95,94.0
996,male,group C,high school,free/reduced,none,62,55,55,57.0
997,female,group C,high school,free/reduced,completed,59,71,65,65.0
998,female,group D,some college,standard,completed,68,78,77,74.0


In [20]:
df_grouped_by_parental_level_of_education = df.groupby(['parental_level_of_education'])[['math_score', 'reading_score', 'writing_score', 'final_score_avg']].mean().reset_index()

df_grouped_by_parental_level_of_education

Unnamed: 0,parental_level_of_education,math_score,reading_score,writing_score,final_score_avg
0,associate's degree,67.882883,70.927928,69.896396,69.558559
1,bachelor's degree,69.389831,73.0,73.381356,71.949153
2,high school,62.137755,64.704082,62.44898,63.122449
3,master's degree,69.745763,75.372881,75.677966,73.576271
4,some college,67.128319,69.460177,68.840708,68.446903
5,some high school,63.497207,66.938547,64.888268,65.072626


In [30]:
# Plot a bar chart with df_grouped_by_parental_level_of_education data

fig = px.bar(df_grouped_by_parental_level_of_education, 
             x='parental_level_of_education', 
             y=['math_score', 'reading_score', 'writing_score', 'final_score_avg'], 
             title='Comparação de Médias por Nível de Educação dos Pais', 
             labels={'value': 'Média de Notas', 'variable': 'Area', 'parental_level_of_education': 'Nível de Educação dos Pais'}, 
             text_auto=True,
             barmode='group')

fig.show()

In [39]:
# Create a plot to show the correlation between the scores
fig = px.bar(df, x='math_score', y='reading_score', color='parental_level_of_education', title='Correlação entre as Notas de Matemática e Leitura x Nivel de Educação dos Pais', labels={'math_score': 'Nota de Matemática', 'reading_score': 'Nota de Leitura'})
fig.show()