In [None]:
import pandas as pd

# Load dataset
file_path = 'student_performance.csv'
df = pd.read_csv(file_path)

# Display basic info
display(df.head())

In [None]:
# Check for missing values
print(df.isnull().sum())

In [None]:
# Convert Distance_from_Home to numerical values
distance_mapping = {"Near": 1, "Moderate": 2, "Far": 3}
df["Distance_from_Home"] = df["Distance_from_Home"].map(distance_mapping)

# Fill missing values for numerical columns with median
numerical_cols = ["Distance_from_Home"]
for col in numerical_cols:
    df[col].fillna(df[col].median(), inplace=True)

# Fill missing values for categorical columns with mode
categorical_cols = ["Teacher_Quality", "Parental_Education_Level"]
for col in categorical_cols:
    df[col].fillna(df[col].mode()[0], inplace=True)

# Check if all missing values are handled
print(df.isnull().sum())


In [None]:
import pandas as pd
import plotly.express as px

# Filter students who scored 70 and above
high_scorers = df[df["Exam_Score"] >= 70]

# Count of high scorers by gender
gender_counts = high_scorers["Gender"].value_counts().reset_index()
gender_counts.columns = ["Gender", "Count"]

# Create a simple bar chart
fig = px.bar(gender_counts, x="Gender", y="Count", color="Gender",
             title="📊 Number of High Scorers (Exam Score ≥ 70) by Gender",
             labels={"Count": "Number of Students", "Gender": "Student Gender"},
             text="Count",
             template="plotly")

# Update layout for simplicity
fig.update_traces(textposition="outside", marker=dict(line=dict(width=1, color="black")))
fig.update_layout(title_font_size=18, title_x=0.5, xaxis_title="Gender", yaxis_title="Count")

# Show the plot
fig.show()

In [None]:
import pandas as pd
import plotly.express as px


# Filter students who scored 70 and above
high_scorers = df[df["Exam_Score"] >= 70]

# Count of high scorers by parental education level
education_counts = high_scorers["Parental_Education_Level"].value_counts().reset_index()
education_counts.columns = ["Parental_Education_Level", "Count"]

# Create a simple bar chart
fig = px.bar(education_counts, x="Parental_Education_Level", y="Count", color="Parental_Education_Level",
             title="📊 Influence of Parental Education on High Exam Scores (≥ 70)",
             labels={"Count": "Number of High Scoring Students", "Parental_Education_Level": "Parental Education"},
             text="Count",
             template="plotly")

# Update layout for simplicity
fig.update_traces(textposition="outside", marker=dict(line=dict(width=1, color="black")))
fig.update_layout(title_font_size=18, title_x=0.5, xaxis_title="Parental Education Level", yaxis_title="Count")

# Show the plot
fig.show()


In [None]:
import pandas as pd
import plotly.express as px

# Normalize exam scores as a percentage of the highest exam score
df["Exam_Score_Percentage"] = (df["Exam_Score"] / df["Exam_Score"].max()) * 100

# Calculate the average percentage exam score by school type
school_scores = df.groupby("School_Type")["Exam_Score_Percentage"].mean().reset_index()

# Create a bar chart
fig = px.bar(school_scores, x="School_Type", y="Exam_Score_Percentage", color="School_Type",
             title="📊 Effect of School Type on Exam Scores (Percentage)",
             labels={"Exam_Score_Percentage": "Average Exam Score (%)", "School_Type": "Type of School"},
             text="Exam_Score_Percentage",
             template="plotly")

# Format percentage values
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside", marker=dict(line=dict(width=1, color="black")))

# Update layout
fig.update_layout(title_font_size=18, title_x=0.5, xaxis_title="School Type", yaxis_title="Avg Exam Score (%)")

# Show the plot
fig.show()

In [None]:
import pandas as pd
import plotly.express as px

# Normalize exam scores as a percentage of the highest exam score
df["Exam_Score_Percentage"] = (df["Exam_Score"] / df["Exam_Score"].max()) * 100

# Calculate the average percentage exam score by family income level
income_scores = df.groupby("Family_Income")["Exam_Score_Percentage"].mean().reset_index()

# Create a bar chart
fig = px.bar(income_scores, x="Family_Income", y="Exam_Score_Percentage", color="Family_Income",
             title="💰 Relationship Between Family Income and Exam Scores (Percentage)",
             labels={"Family_Income": "Family Income Level", "Exam_Score_Percentage": "Average Exam Score (%)"},
             text="Exam_Score_Percentage",
             template="plotly")

# Format percentage values
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside", marker=dict(line=dict(width=1, color="black")))

# Update layout
fig.update_layout(title_font_size=18, title_x=0.5, xaxis_title="Family Income Level", yaxis_title="Avg Exam Score (%)")

# Show the plot
fig.show()

In [None]:
import pandas as pd
import plotly.express as px

# Normalize exam scores as a percentage of the highest exam score
df["Exam_Score_Percentage"] = (df["Exam_Score"] / df["Exam_Score"].max()) * 100

# Calculate the average percentage exam score by extracurricular activity participation
activity_scores = df.groupby("Extracurricular_Activities")["Exam_Score_Percentage"].mean().reset_index()

# Create a bar chart
fig = px.bar(activity_scores, x="Extracurricular_Activities", y="Exam_Score_Percentage", color="Extracurricular_Activities",
             title="🎭 Impact of Extracurricular Activities on Exam Scores (Percentage)",
             labels={"Extracurricular_Activities": "Participation in Activities", "Exam_Score_Percentage": "Average Exam Score (%)"},
             text="Exam_Score_Percentage",
             template="plotly")

# Format percentage values
fig.update_traces(texttemplate="%{text:.1f}%", textposition="outside", marker=dict(line=dict(width=1, color="black")))

# Update layout
fig.update_layout(title_font_size=18, title_x=0.5, xaxis_title="Extracurricular Activities", yaxis_title="Avg Exam Score (%)")

# Show the plot
fig.show()

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go


# Interactive Scatter Plot: Hours_Studied vs Exam_Score
fig1 = px.scatter(
    df,
    x="Hours_Studied",
    y="Exam_Score",
    title="Impact of Hours Studied on Exam Score",
    labels={"Hours_Studied": "Hours Studied", "Exam_Score": "Exam Score"},
    trendline="ols",  # Add a trendline for linear relationship
    hover_data=["Sleep_Hours", "Previous_Scores"]  # Add additional hover info
)
fig1.show()

# Interactive Scatter Plot: Sleep_Hours vs Exam_Score
fig2 = px.scatter(
    df,
    x="Sleep_Hours",
    y="Exam_Score",
    title="Impact of Sleep Hours on Exam Score",
    labels={"Sleep_Hours": "Sleep Hours", "Exam_Score": "Exam Score"},
    trendline="ols",  # Add a trendline for linear relationship
    hover_data=["Hours_Studied", "Previous_Scores"]  # Add additional hover info
)
fig2.show()

# Interactive Scatter Plot: Previous_Scores vs Exam_Score
fig3 = px.scatter(
    df,
    x="Previous_Scores",
    y="Exam_Score",
    title="Impact of Previous Scores on Exam Score",
    labels={"Previous_Scores": "Previous Scores", "Exam_Score": "Exam Score"},
    trendline="ols",  # Add a trendline for linear relationship
    hover_data=["Hours_Studied", "Sleep_Hours"]  # Add additional hover info
)
fig3.show()

# Interactive 3D Scatter Plot: Hours_Studied, Sleep_Hours, Previous_Scores vs Exam_Score
fig4 = px.scatter_3d(
    df,
    x="Hours_Studied",
    y="Sleep_Hours",
    z="Previous_Scores",
    color="Exam_Score",
    title="3D Visualization of Study Habits and Exam Scores",
    labels={
        "Hours_Studied": "Hours Studied",
        "Sleep_Hours": "Sleep Hours",
        "Previous_Scores": "Previous Scores",
        "Exam_Score": "Exam Score"
    },
    hover_name="Exam_Score"
)
fig4.show()

# Interactive Parallel Coordinates Plot
fig5 = px.parallel_coordinates(
    df,
    color="Exam_Score",
    dimensions=["Hours_Studied", "Sleep_Hours", "Previous_Scores", "Exam_Score"],
    title="Parallel Coordinates Plot of Study Habits and Exam Scores",
    labels={
        "Hours_Studied": "Hours Studied",
        "Sleep_Hours": "Sleep Hours",
        "Previous_Scores": "Previous Scores",
        "Exam_Score": "Exam Score"
    }
)
fig5.show()