In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

# 1. Load your data
# Ensure your file is named 'data.csv' and is in the same folder as this script
df = pd.read_csv('student_combined_data.csv')


In [None]:
import pandas as pd

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Calculate Basic Statistics ---
# We select the two relevant columns and use .agg to get most stats at once
overall_stats = df[['performance_score', 'aptitude_score']].agg(['mean', 'median', 'min', 'max'])

# --- 3. Calculate "Range" (Spread) ---
# Range isn't a built-in pandas string function, so we calculate it manually
# Range = Max - Min
overall_stats.loc['range'] = overall_stats.loc['max'] - overall_stats.loc['min']

# --- 4. Cleaning Up the Table ---
# Rename the Index to match your "Academic" labels
row_labels = {
    'mean': 'Average (Mean)',
    'median': 'Middle Value (Median)',
    'min': 'Lowest Score',
    'max': 'Highest Score',
    'range': 'Spread (Range)'
}
overall_stats = overall_stats.rename(index=row_labels)

# --- 5. Formatting ---
# Round Performance to 2 decimals and Aptitude to 2 decimals
# (You can adjust this if you want Aptitude to be integers)
pd.options.display.float_format = '{:,.2f}'.format

# --- 6. Display ---
print(overall_stats)

In [None]:
import pandas as pd

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Calculate Basic Statistics ---
# Group by 'course_level' and calculate count, mean, min, max, and std dev
stats = df.groupby('course_level')['performance_score'].agg(['count', 'mean', 'min', 'max', 'std'])

# --- 3. Calculate "Typical Range" (Mean +/- SD) ---
# We calculate the lower and upper bounds
stats['lower'] = stats['mean'] - stats['std']
stats['upper'] = stats['mean'] + stats['std']

# Combine them into a single string column like "2.85 - 3.62"
stats['Typical Range'] = (
    stats['lower'].map('{:.2f}'.format) + ' - ' +
    stats['upper'].map('{:.2f}'.format)
)

# --- 4. Formatting & Cleaning ---
# Rename columns to match your Table 2 headers
stats = stats.rename(columns={
    'count': 'Students',
    'mean': 'Average',
    'min': 'Lowest',
    'max': 'Highest'
})

# Select only the columns we want to display
final_table = stats[['Students', 'Average', 'Lowest', 'Highest', 'Typical Range']]

# Reorder the rows to match your image (Advanced -> Intermediate -> Foundation)
final_table = final_table.reindex(['Advanced', 'Intermediate', 'Foundation'])

# Apply formatting to the numeric columns (2 decimal places)
# Note: 'Students' is an integer, so we don't apply float format to it.
pd.options.display.float_format = '{:.2f}'.format

# --- 5. Display ---
print("Table 2: Performance Scores Across Course Levels")
print(final_table)

In [None]:
import pandas as pd

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Calculate Basic Statistics ---
# Group by 'course_level' and calculate stats for 'aptitude_score'
stats = df.groupby('course_level')['aptitude_score'].agg(['count', 'mean', 'min', 'max', 'std'])

# --- 3. Calculate "Typical Range" (Mean +/- Standard Deviation) ---
# The image shows the range values as Integers (e.g., "48 - 86"), so we round them.
stats['lower'] = (stats['mean'] - stats['std']).round(0).astype(int)
stats['upper'] = (stats['mean'] + stats['std']).round(0).astype(int)

# Create the "Int - Int" string format
stats['Typical Range'] = (
    stats['lower'].astype(str) + ' - ' +
    stats['upper'].astype(str)
)

# --- 4. Formatting & Cleaning ---
# Rename columns to match Table 3
stats = stats.rename(columns={
    'count': 'Students',
    'mean': 'Average',
    'min': 'Lowest',
    'max': 'Highest'
})

# Reorder rows (Advanced -> Intermediate -> Foundation)
stats = stats.reindex(['Advanced', 'Intermediate', 'Foundation'])

# Select only the specific columns shown in the image
final_table = stats[['Students', 'Average', 'Lowest', 'Highest', 'Typical Range']]

# --- 5. Final Display Settings ---
# Ensure 'Average' displays with 2 decimal places
pd.options.display.float_format = '{:.2f}'.format

print("Table 3: Aptitude Scores Across Course Levels")
print(final_table)

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Create the Visualization ---

# Set the background style to match the image (grey grid)
sns.set_style("darkgrid")

# Create the figure size
plt.figure(figsize=(8, 5))

# Draw the Boxplot
sns.boxplot(
    data=df,
    x='course_level',
    y='aptitude_score',
    order=['Advanced', 'Foundation', 'Intermediate'],
    boxprops=dict(facecolor=(0,0,0,0)),
    width=0.5,
    linewidth=1.2,
    fliersize=6         # Slightly larger outliers to match visibility
)

# --- 3. Customization ---
# We manually set the labels here so they look nice (Capitalized) even if the CSV columns are lowercase
plt.title('Performance Score Distribution by Course Level', fontsize=14)
plt.ylabel('Performance Score')
plt.xlabel('Course Level')
plt.grid(axis='x', color='white', linestyle='-', linewidth=1.5)
# Show the plot
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Create the Visualization ---

# Set the background style to match the image (darkgrid is correct)
sns.set_style("darkgrid")

# CHANGE 1: Update figsize to match the wider aspect ratio of the screenshot
plt.figure(figsize=(8, 6))

# Draw the Boxplot
sns.boxplot(
    data=df,
    x='course_level',
    y='aptitude_score',
    order=['Advanced', 'Foundation', 'Intermediate'],
    boxprops=dict(facecolor=(0,0,0,0)),
    width=0.5,
    linewidth=1.2,
    fliersize=6         # Slightly larger outliers to match visibility
)

# --- 3. Customization ---

# CHANGE 2: Increase font sizes to match the screenshot's readability
plt.title('Aptitude Score Distribution by Course Level', fontsize=18)
plt.ylabel('Aptitude Score', fontsize=14)
plt.xlabel('Course Level', fontsize=14)
plt.grid(axis='x', color='white', linestyle='-', linewidth=1.5)
# CHANGE 3: Increase the size of the tick labels (the text on the axes)
plt.tick_params(axis='both', which='major', labelsize=13)

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Styling Setup ---
sns.set_style("darkgrid") # Matches the grey background with white grid lines

# Create Figure
plt.figure(figsize=(10, 5))

# --- 3. Draw the Violin Plot ---
sns.violinplot(
    data=df,
    x='course_level',
    y='performance_score',

    # 1. Specific Order seen in the image (Advanced -> Intermediate -> Foundation)
    order=['Advanced', 'Intermediate', 'Foundation'],

    # 2. Color Styling
    # The image uses a uniform pink color for all violins.
    # '#F6848F' is a close match to the "Seaborn pastel pink" seen in the screenshot.
    color='#F6848F',

    # 3. Inner Styling
    # inner='box' is the default, showing the thick grey bar (interquartile range)
    # and white dot (median) inside the violin.
    inner='box',
    linewidth=1.2
)

# --- 4. Labels and Title ---
plt.title('Performance Score Distribution (Violin Plot)', fontsize=14)
plt.ylabel('Performance Score')
plt.xlabel('Course Level')
plt.grid(axis='x', color='white', linestyle='-', linewidth=1.5)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Styling Setup ---
sns.set_style("darkgrid")

# Create Figure
plt.figure(figsize=(10, 5))

# --- 3. Draw the Violin Plot ---
sns.violinplot(
    data=df,
    x='course_level',
    y='aptitude_score',   # Changed Y-variable to Aptitude

    # 1. Order matches the image
    order=['Advanced', 'Intermediate', 'Foundation'],

    # 2. Color matches the specific pink in your screenshot
    color='#F6848F',

    # 3. Inner boxplot styling
    inner='box',
    linewidth=1.2
)

# --- 4. Labels and Title ---
plt.title('Aptitude Distribution (Violin Plot)', fontsize=14)
plt.ylabel('Aptitude Score')
plt.xlabel('Course Level')
plt.grid(axis='x', color='white', linestyle='-', linewidth=1.5)
plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- 1. Load Data ---
# Ensure 'student_combined_data.csv' is in the same directory
df = pd.read_csv('student_combined_data.csv')

# --- 2. Styling Setup ---
sns.set_style("darkgrid")

# Create Figure
plt.figure(figsize=(8, 5))

# --- 3. Draw the Bar Plot with Standard Deviation ---
sns.barplot(
    data=df,
    x='course_level',
    y='performance_score',

    # --- NEW PARAMETER ---
    # Controls the thickness of the bars. Default is ~0.8.
    # Lower values make the bars slimmer.
    width=0.5,
    # ---------------------

    # 1. Order: Matches the specific sequence in your image
    order=['Advanced', 'Foundation', 'Intermediate'],

    # 2. Error Bars: 'sd' = Standard Deviation
    errorbar='sd',

    # 3. Capsize: Adds the horizontal "caps" to the error bars
    capsize=0.1,

    # 4. Coloring
    hue='course_level',
    legend=False,

    # Optional: Ensure error bars are black (matches image style)
    err_kws={'color': 'black', 'linewidth': 1.5}
)

# --- 4. Customization ---
plt.title('Mean Performance Score by Level (with SD)', fontsize=14)
plt.ylabel('Mean Performance Score')
plt.xlabel('Course Level')
plt.grid(axis='x', color='white', linestyle='-', linewidth=1.5)
# Rotate x-axis labels 45 degrees to match the image
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- 1. Load Data ---
# Ensure 'student_combined_data.csv' is in the same directory
df = pd.read_csv('student_combined_data.csv')

# --- 2. Styling Setup ---
sns.set_style("darkgrid")

# Create Figure
plt.figure(figsize=(8, 5))

# --- 3. Draw the Bar Plot with Standard Deviation ---
sns.barplot(
    data=df,
    x='course_level',
    y='aptitude_score',

    # --- NEW PARAMETER ---
    # Controls the thickness of the bars. Default is ~0.8.
    # Lower values make the bars slimmer.
    width=0.5,
    # ---------------------

    # 1. Order: Matches the specific sequence in your image
    order=['Advanced', 'Foundation', 'Intermediate'],

    # 2. Error Bars: 'sd' = Standard Deviation
    errorbar='sd',

    # 3. Capsize: Adds the horizontal "caps" to the error bars
    capsize=0.1,

    # 4. Coloring
    hue='course_level',
    legend=False,

    # Optional: Ensure error bars are black (matches image style)
    err_kws={'color': 'black', 'linewidth': 1.5}
)

# --- 4. Customization ---
plt.title('Mean Performance Score by Level (with SD)', fontsize=14)
plt.ylabel('Mean Performance Score')
plt.xlabel('Course Level')
plt.grid(axis='x', color='white', linestyle='-', linewidth=1.5)
# Rotate x-axis labels 45 degrees to match the image
plt.xticks(rotation=45)

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Calculate Correlation ---
# We need this to display "r = 0.887" in the title
r, p_value = stats.pearsonr(df['performance_score'], df['aptitude_score'])

# --- 3. Styling Setup ---
sns.set_style("darkgrid")
plt.figure(figsize=(10, 5))

# --- 4. Create the Visualization ---

# Layer 1: Scatter Plot (The colorful points)
sns.scatterplot(
    data=df,
    x='performance_score',
    y='aptitude_score',
    hue='course_level',  # Colors points by level
    s=80,                # Size of points (matches the large circles in your image)
    alpha=0.7,           # Transparency (allows seeing overlapping points)
    palette='tab10'      # Default deep colors (Blue/Orange/Green)
)

# Layer 2: Regression Line (The red dashed line)
# scatter=False prevents drawing the points twice
# ci=None removes the shaded confidence interval area
sns.regplot(
    data=df,
    x='performance_score',
    y='aptitude_score',
    scatter=False,
    ci=None,
    line_kws={"color": "red", "ls": "--", "linewidth": 2.5} # Red, Dashed, Thick
)

# --- 5. Customization ---
plt.title(f'Performance vs Aptitude Score (r = {r:.3f})', fontsize=14)
plt.ylabel('Aptitude Score', fontsize=11)
plt.xlabel('Performance Score', fontsize=11)

# Place legend in the upper left to match image
plt.legend(loc='upper left', frameon=False) # frameon=False removes the box around legend text

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Styling Setup ---
sns.set_style("darkgrid")
plt.figure(figsize=(10, 5))

# --- 3. Define Custom Colors ---
# Matches the Pink, Tan, Green palette in your image
colors = {
    'Advanced': '#F49AC2',      # Pastel Pink
    'Intermediate': '#C4B083',  # Tan/Mustard
    'Foundation': '#93C47D'     # Soft Green
}
# --- 4. Plot Overlaid Histograms ---
for level, color in colors.items():
    sns.histplot(
        data=df[df['course_level'] == level],
        x='performance_score',
        bins=15,
        alpha=0.6,
        color=color,
        label=level,
        edgecolor=None,
        linewidth=0
    )

# --- 5. Customization ---
# Title and Labels to match your screenshot
plt.title('Performance Score Histogram by Level', fontsize=14)
plt.ylabel('Frequency')
plt.xlabel('Performance Score') # Label matches image, even if data is Aptitude

# Legend adjustments
plt.legend( labels=['Advanced', 'Intermediate', 'Foundation'])

plt.tight_layout()
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# --- 1. Load Data ---
df = pd.read_csv('student_combined_data.csv')

# --- 2. Styling Setup ---
sns.set_style("darkgrid")
plt.figure(figsize=(10, 5))

# --- 3. Define Custom Colors ---
# Matches the Pink, Tan, Green palette in your image
colors = {
    'Advanced': '#F49AC2',      # Pastel Pink
    'Intermediate': '#C4B083',  # Tan/Mustard
    'Foundation': '#93C47D'     # Soft Green
}
# --- 4. Plot Overlaid Histograms ---
for level, color in colors.items():
    sns.histplot(
        data=df[df['course_level'] == level],
        x='aptitude_score',
        bins=15,
        alpha=0.6,
        color=color,
        label=level,
        edgecolor=None,
        linewidth=0
    )

# --- 5. Customization ---
# Title and Labels to match your screenshot
plt.title('Aptitude Score Histogram by Level', fontsize=14)
plt.ylabel('Frequency')
plt.xlabel('Aptitude Score') # Label matches image, even if data is Aptitude

# Legend adjustments
plt.legend( labels=['Advanced', 'Intermediate', 'Foundation'])

plt.tight_layout()
plt.show()