# 3. Focused Analysis: Study Habits & Lifestyle

This notebook explores specific relationships detailed in the "Hábitos de Estudo" and "Estilo de Vida" sections of the portfolio.

In [None]:
# If we saved the cleaned data, we could load it here instead:
# df = pd.read_parquet('cleaned_student_data.parquet')

## Análise: Hábitos de Estudo vs. Desempenho

### Horas de Estudo/Dia vs. Nota do Exame

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='study_hours_per_day', y='exam_score', data=df, alpha=0.6, color='teal')
sns.regplot(x='study_hours_per_day', y='exam_score', data=df, scatter=False, color='darkred') # Add regression line
plt.title('Horas de Estudo/Dia vs. Nota do Exame')
plt.xlabel('Horas de Estudo por Dia')
plt.ylabel('Nota do Exame')
plt.show()

### Frequência às Aulas vs. Nota do Exame

In [None]:
plt.figure(figsize=(10, 6))
sns.scatterplot(x='attendance_percentage', y='exam_score', data=df, alpha=0.6, color='purple')
sns.regplot(x='attendance_percentage', y='exam_score', data=df, scatter=False, color='darkred')
plt.title('Frequência às Aulas (%) vs. Nota do Exame')
plt.xlabel('Frequência às Aulas (%)')
plt.ylabel('Nota do Exame')
plt.show()

### Desempenho por Faixas de Horas de Estudo

The HTML used bins: '0-2h', '2.1-4h', '4.1-6h', '>6h'. Let's create these.

In [None]:
study_bins = [0, 2, 4, 6, df['study_hours_per_day'].max() + 0.1] # +0.1 to include max value
study_labels = ['0-2h', '2.1-4h', '4.1-6h', '>6h']
df['study_hours_bin'] = pd.cut(df['study_hours_per_day'], 
                                bins=study_bins, 
                                labels=study_labels, 
                                right=True, # intervals are (a, b]
                                include_lowest=True)

plt.figure(figsize=(10, 6))
sns.boxplot(x='study_hours_bin', y='exam_score', data=df, palette='viridis')
plt.title('Desempenho por Faixas de Horas de Estudo')
plt.xlabel('Faixa de Horas de Estudo por Dia')
plt.ylabel('Nota do Exame')
plt.show()

# Display counts per bin for context
print(df['study_hours_bin'].value_counts().sort_index())

## Análise: Estilo de Vida e Bem-Estar vs. Desempenho

Factors mentioned in HTML: `sleep_hours`, `mental_health_rating`, `diet_quality`, `parental_education_level`, `extracurricular_participation`, `part_time_job`.

In [None]:
lifestyle_factors = {
    'sleep_hours': 'numerical',
    'mental_health_rating': 'numerical', # Technically ordinal, but scatter used in HTML
    'diet_quality': 'categorical_ordered',
    'parental_education_level': 'categorical_ordered',
    'extracurricular_participation': 'categorical_binary',
    'part_time_job': 'categorical_binary'
}

for factor, f_type in lifestyle_factors.items():
    plt.figure(figsize=(10,6))
    if f_type == 'numerical':
        sns.scatterplot(x=factor, y='exam_score', data=df, alpha=0.5)
        sns.regplot(x=factor, y='exam_score', data=df, scatter=False, color='darkred')
        plt.title(f'{factor.replace("_", " ").title()} vs. Nota do Exame')
    elif f_type.startswith('categorical'):
        order = None
        if df[factor].dtype.name == 'category' and df[factor].cat.ordered:
             order = df[factor].cat.categories
        sns.boxplot(x=factor, y='exam_score', data=df, order=order, palette='plasma')
        plt.title(f'Nota do Exame por {factor.replace("_", " ").title()}')
        plt.xticks(rotation=45, ha='right')
        
    plt.xlabel(factor.replace("_", " ").title())
    plt.ylabel('Nota do Exame')
    plt.tight_layout()
    plt.show()