import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import norm

# Data Preparation
data = {
    "Student": range(1, 31),
    "Study Hours": [17.5, 14.3, 18.2, 22.6, 13.8, 13.8, 22.9, 18.8, 12.7, 17.7, 12.7, 12.7, 16.2, 5.4, 6.4, 12.2, 9.9, 16.6, 10.5, 7.9, 22.3, 13.9, 15.3, 7.9, 12.3, 15.6, 9.2, 16.9, 12.0, 13.5],
    "Final Exam Score": [69.0, 93.5, 74.9, 64.4, 83.2, 62.8, 77.1, 55.4, 61.7, 77.0, 82.4, 76.7, 73.8, 72.0, 60.2, 67.8, 70.4, 85.6, 78.4, 57.4, 78.2, 71.1, 68.2, 81.1, 85.3, 84.3, 66.6, 71.9, 78.3, 84.8],
    "Class Attendance": ['Rarely', 'Never', 'Frequently', 'Occasionally', 'Occasionally', 'Rarely', 'Frequently', 'Never', 'Occasionally', 'Frequently', 'Frequently', 'Rarely', 'Occasionally', 'Occasionally', 'Never', 'Occasionally', 'Never', 'Occasionally', 'Rarely', 'Occasionally', 'Never', 'Never', 'Rarely', 'Occasionally', 'Rarely', 'Occasionally', 'Occasionally', 'Never', 'Occasionally'],
    "Extracurricular Participation": ['Yes', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'Yes', 'No', 'Yes', 'No', 'No', 'No', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'Yes', 'No', 'Yes', 'No', 'Yes', 'Yes', 'Yes', 'No', 'No'],
    "Sleep Hours": [8.4, 7.5, 6.2, 7.8, 7.1, 8.5, 5.9, 6.5, 6.4, 4.8, 7.4, 7.4, 7.0, 6.6, 4.9, 6.4, 6.5, 5.8, 6.8, 7.6, 9.8, 7.3, 7.4, 6.9, 4.1, 7.0, 7.1, 10.7, 6.7, 7.5],
    "Socioeconomic Status": ['Medium', 'Low', 'Medium', 'Medium', 'High', 'High', 'High', 'High', 'Low', 'High', 'Medium', 'Low', 'Medium', 'Medium', 'Medium', 'High', 'High', 'Low', 'Low', 'High', 'Medium', 'Low', 'High', 'High', 'High', 'Medium', 'High', 'High', 'High', 'High']
}

df = pd.DataFrame(data)

# Set the style
sns.set(style="whitegrid")

# Histograms for each variable
plt.figure(figsize=(14, 10))

plt.subplot(3, 2, 1)
sns.histplot(df['Study Hours'], bins=10, kde=True)
plt.title('Distribution of Study Hours per Week')

plt.subplot(3, 2, 2)
sns.histplot(df['Final Exam Score'], bins=10, kde=True)
plt.title('Distribution of Final Exam Scores')

plt.subplot(3, 2, 3)
sns.histplot(df['Sleep Hours'], bins=10, kde=True)
plt.title('Distribution of Sleep Hours per Night')

plt.subplot(3, 2, 4)
sns.histplot(df['Class Attendance'].astype('category').cat.codes, bins=4, kde=False)
plt.title('Distribution of Class Attendance')

plt.subplot(3, 2, 5)
sns.histplot(df['Socioeconomic Status'].astype('category').cat.codes, bins=3, kde=False)
plt.title('Distribution of Socioeconomic Status')

plt.tight_layout()
plt.show()

# Scatter Plots
plt.figure(figsize=(14, 6))

plt.subplot(1, 2, 1)
sns.scatterplot(x='Study Hours', y='Final Exam Score', data=df)
plt.title('Study Hours vs Final Exam Score')

plt.subplot(1, 2, 2)
sns.scatterplot(x='Sleep Hours', y='Final Exam Score', data=df)
plt.title('Sleep Hours vs Final Exam Score')

plt.tight_layout()
plt.show()

# Box Plot for Final Exam Score by Class Attendance
plt.figure(figsize=(10, 6))
sns.boxplot(x='Class Attendance', y='Final Exam Score', data=df)
plt.title('Final Exam Scores by Class Attendance')
plt.show()

# CDF Plot for Study Hours per Week
plt.figure(figsize=(8, 6))
sorted_hours = np.sort(df['Study Hours'])
cdf = np.arange(1, len(sorted_hours)+1) / len(sorted_hours)
plt.plot(sorted_hours, cdf, marker='.', linestyle='none')
plt.title('CDF of Study Hours per Week')
plt.xlabel('Study Hours per Week')
plt.ylabel('CDF')
plt.grid(True)
plt.show()

# Fit Normal Distribution to Final Exam Scores
plt.figure(figsize=(10, 6))
sns.histplot(df['Final Exam Score'], bins=10, kde=False, stat='density', color='blue', alpha=0.6)
xmin, xmax = plt.xlim()
x = np.linspace(xmin, xmax, 100)
p = norm.pdf(x, df['Final Exam Score'].mean(), df['Final Exam Score'].std())
plt.plot(x, p, 'k', linewidth=2)
plt.title('Normal Distribution Fit to Final Exam Scores')
plt.xlabel('Final Exam Score')
plt.ylabel('Density')
plt.show()
