In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy as sp
import seaborn as sns

ENUMDATA = ["Access_to_Resources","Parental_Involvement","Motivation_Level","Teacher_Quality","Family_Income","Extracurricular_Activities","Internet_Access","Learning_Disabilities","Peer_Influence","Parental_Education_Level","Gender","Distance_from_Home","School_Type"]

data = pd.read_csv("StudentPerformanceFactors.csv")
raw_data = pd.read_csv("StudentPerformanceFactors.csv")

LOWMEDHIGH = {"Low":1,"Medium":2,"High":3}
for i in ["Access_to_Resources","Parental_Involvement","Motivation_Level","Teacher_Quality","Family_Income"]:
    data[i] = data[i].map(LOWMEDHIGH)

NOYES = {"No":0,"Yes":1}
for i in ["Extracurricular_Activities","Internet_Access","Learning_Disabilities"]:
    data[i] = data[i].map(NOYES)

PEERMAP = {"Positive":1,"Negative":-1,"Neutral":0}
data["Peer_Influence"] = data["Peer_Influence"].map(PEERMAP)

EDULVLMAP = {"College":2,"Postgraduate":3,"High School":1, float('nan'):0}
data["Parental_Education_Level"] = data["Parental_Education_Level"].map(EDULVLMAP)

GENDERMAP = {"Male":0,"Female":1}
data["Gender"] = data["Gender"].map(GENDERMAP)

NEARMODFAR = {"Near":3,"Moderate":2,"Far":1}
data["Distance_from_Home"] = data["Distance_from_Home"].map(NEARMODFAR)

SCHOOLTYPEMAP = {"Public":0,"Private":1}
data["School_Type"] = data["School_Type"].map(SCHOOLTYPEMAP)

In [None]:
# Sample dataset: student test scores
scores = raw_data["Exam_Score"]

# Convert the list to a pandas Series for easy manipulation
scores_series = pd.Series(scores)

# Calculate percentiles
percentiles = [25, 50, 75]
percentile_values = np.percentile(scores, percentiles)

# Box Plot
plt.figure(figsize=(12, 6))

# Create a box plot
plt.subplot(1, 2, 1)
plt.boxplot(scores_series, vert=False)
plt.title('Box Plot of Exam Scores')
plt.xlabel('Scores')

# Add percentile lines
for perc, value in zip(percentiles, percentile_values):
    plt.axvline(x=value, linestyle='--', label=f'{perc}th Percentile: {value}')

plt.legend()

# CDF Plot
plt.subplot(1, 2, 2)
sorted_scores = np.sort(scores)
cdf = np.arange(1, len(sorted_scores) + 1) / len(sorted_scores)

# Plotting the CDF
plt.plot(sorted_scores, cdf, marker='o', linestyle='-', color='b')
plt.title('Cumulative Distribution Function (CDF)')
plt.xlabel('Scores')
plt.ylabel('Cumulative Probability')
plt.axhline(y=0.25, color='r', linestyle='--', label='25th Percentile')
plt.axhline(y=0.50, color='g', linestyle='--', label='50th Percentile (Median)')
plt.axhline(y=0.75, color='orange', linestyle='--', label='75th Percentile')

plt.legend()

# Show the plots
plt.tight_layout()
plt.show()

In [None]:
df = raw_data

# Counting occurrences of each unique value in the 'Fruit' column
fruit_counts = df['Teacher_Quality'].value_counts()

# Prepare labels that combine the category and their counts
labels = [f'{fruit} hours' for fruit in fruit_counts.index]

# Plotting the pie chart with labels outside the pie
plt.figure(figsize=(6,6))  # Optional: Specify figure size
plt.pie(
    fruit_counts, 
    labels=labels, 
    autopct='%1.1f%%', 
    startangle=90, 
    pctdistance=0.86,     # Position of the percentage text
    labeldistance=1.1     # Position of the labels (further out to avoid overlap)
)

# Draw a circle at the center to make it a donut
centre_circle = plt.Circle((0,0),0.70,fc='white')
fig = plt.gcf()
fig.gca().add_artist(centre_circle)
# Add a title and display the plot
plt.title('Sleep hours per day')
plt.show()