In [None]:
import pandas as pd
import matplotlib.pyplot as plt
from mlxtend.frequent_patterns import apriori, association_rules

In [None]:
# Load dataset
file_path = "graded_exams.csv"
df = pd.read_csv(file_path)


In [None]:
# Verify the first few rows to ensure it's loaded correctly
print(df.head())

In [None]:
# Select categorical columns for association rule mining
df_apriori = df[['gender', 'race/ethnicity', 'parental level of education',
                        'lunch', 'test preparation course', 'math grade', 'reading grade', 'writing grade']]

In [None]:
# Convert categorical data into binary format (one-hot encoding)
df_apriori_encoded = pd.get_dummies(df_apriori)

# Apply Apriori algorithm
frequent_itemsets = apriori(df_apriori_encoded, min_support=0.05, use_colnames=True)

# Generate association rules based on confidence threshold
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.6)

# Sort rules by highest lift value (strongest relationships)
rules_sorted = rules.sort_values(by="lift", ascending=False)

In [None]:
# Display rules in a nicely formatted table
from IPython.display import display

# Show the association rules
display(rules_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']])

In [None]:
# Show the association rules
display(rules_sorted[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(10))