In [1]:
import pandas as pd

from pprint import pprint
from mlxtend.frequent_patterns import apriori, association_rules

In [2]:
df = pd.read_csv('Student Depression Dataset.csv')

def classify_academic_pressure(pressure):
    if pressure < 2.0:
        return 'Low'
    elif pressure < 4.0:
        return 'Medium'
    else:
        return 'High'

def classify_study_satisfaction(satisfaction):
    if satisfaction < 2.0:
        return 'Low'
    elif satisfaction < 4.0:
        return 'Medium'
    else:
        return 'High'

def classify_financial_stress(stress):
    if stress < 2.0:
        return 'Low'
    elif stress < 4.0:
        return 'Medium'
    else:
        return 'High'

def classify_work_study_hours(hours):
    if hours < 5.0:
        return 'Low'
    elif hours < 10.0:
        return 'Medium'
    else:
        return 'High'

def classify_cgpa(cgpa):
    if cgpa < 5.0:
        return 'Low'
    elif cgpa < 7.0:
        return 'Medium'
    else:
        return 'High'

def classify_age(age):
    if age < 20:
        return 'Teenager'
    elif age < 30:
        return 'Young Adult'
    else:
        return 'Adult'

df['Academic Pressure Class'] = df['Academic Pressure'].apply(classify_academic_pressure)
df['Study Satisfaction Class'] = df['Study Satisfaction'].apply(classify_study_satisfaction)
df['Financial Stress Class'] = df['Financial Stress'].apply(classify_financial_stress)
df['Work/Study Hours Class'] = df['Work/Study Hours'].apply(classify_work_study_hours)
df['CGPA Class'] = df['CGPA'].apply(classify_cgpa)
df['Age Class'] = df['Age'].apply(classify_age)

df = df.drop(columns=['id', 'Work Pressure', 'Job Satisfaction',
                      'Academic Pressure', 'Study Satisfaction',
                      'Financial Stress', 'Work/Study Hours',
                      'Age', 'CGPA', 'City', 'Profession'])

In [3]:
df_encoded = pd.get_dummies(df) # One-hot encoding
df_encoded.head()

Unnamed: 0,Depression,Gender_Female,Gender_Male,Sleep Duration_5-6 hours,Sleep Duration_7-8 hours,Sleep Duration_Less than 5 hours,Sleep Duration_More than 8 hours,Sleep Duration_Others,Dietary Habits_Healthy,Dietary Habits_Moderate,...,Financial Stress Class_Medium,Work/Study Hours Class_High,Work/Study Hours Class_Low,Work/Study Hours Class_Medium,CGPA Class_High,CGPA Class_Low,CGPA Class_Medium,Age Class_Adult,Age Class_Teenager,Age Class_Young Adult
0,1,False,True,True,False,False,False,False,True,False,...,False,False,True,False,True,False,False,True,False,False
1,0,True,False,True,False,False,False,False,False,True,...,True,False,True,False,False,False,True,False,False,True
2,0,False,True,False,False,True,False,False,True,False,...,False,False,False,True,True,False,False,True,False,False
3,1,True,False,False,True,False,False,False,False,True,...,False,False,True,False,False,False,True,False,False,True
4,0,True,False,True,False,False,False,False,False,True,...,False,False,True,False,True,False,False,False,False,True


In [4]:
pprint(list(df_encoded.columns))

['Depression',
 'Gender_Female',
 'Gender_Male',
 'Sleep Duration_5-6 hours',
 'Sleep Duration_7-8 hours',
 'Sleep Duration_Less than 5 hours',
 'Sleep Duration_More than 8 hours',
 'Sleep Duration_Others',
 'Dietary Habits_Healthy',
 'Dietary Habits_Moderate',
 'Dietary Habits_Others',
 'Dietary Habits_Unhealthy',
 'Degree_B.Arch',
 'Degree_B.Com',
 'Degree_B.Ed',
 'Degree_B.Pharm',
 'Degree_B.Tech',
 'Degree_BA',
 'Degree_BBA',
 'Degree_BCA',
 'Degree_BE',
 'Degree_BHM',
 'Degree_BSc',
 'Degree_Class 12',
 'Degree_LLB',
 'Degree_LLM',
 'Degree_M.Com',
 'Degree_M.Ed',
 'Degree_M.Pharm',
 'Degree_M.Tech',
 'Degree_MA',
 'Degree_MBA',
 'Degree_MBBS',
 'Degree_MCA',
 'Degree_MD',
 'Degree_ME',
 'Degree_MHM',
 'Degree_MSc',
 'Degree_Others',
 'Degree_PhD',
 'Have you ever had suicidal thoughts ?_No',
 'Have you ever had suicidal thoughts ?_Yes',
 'Family History of Mental Illness_No',
 'Family History of Mental Illness_Yes',
 'Academic Pressure Class_High',
 'Academic Pressure Class_Low',

In [5]:
frq_items = apriori(df_encoded, min_support=0.1, use_colnames=True)
rules = association_rules(frq_items, metric="lift", min_threshold=1)
rules = rules.sort_values(['confidence', 'lift'], ascending=[False, False])



In [6]:
frq_items

Unnamed: 0,support,itemsets
0,0.585499,(Depression)
1,0.44278,(Gender_Female)
2,0.55722,(Gender_Male)
3,0.221605,(Sleep Duration_5-6 hours)
4,0.263288,(Sleep Duration_7-8 hours)
...,...,...
864,0.112218,"(Depression, CGPA Class_High, Age Class_Young ..."
865,0.103509,"(Depression, CGPA Class_High, Financial Stress..."
866,0.10695,"(Depression, Age Class_Young Adult, Financial ..."
867,0.122755,"(Depression, CGPA Class_High, Age Class_Young ..."


In [7]:
rules.head(10)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
166,(Age Class_Teenager),(Degree_Class 12),0.112792,0.217913,0.112039,0.993327,4.558358,0.08746,117.201273,0.879864
3774,"(Have you ever had suicidal thoughts ?_Yes, Ag...",(Depression),0.110175,0.585499,0.10695,0.970722,1.657941,0.042442,14.157521,0.445978
3745,"(Have you ever had suicidal thoughts ?_Yes, CG...",(Depression),0.107165,0.585499,0.103509,0.965886,1.649681,0.040764,12.150577,0.441092
2562,"(Have you ever had suicidal thoughts ?_Yes, Fi...",(Depression),0.16512,0.585499,0.159027,0.9631,1.644922,0.062349,11.232984,0.46961
2576,"(Have you ever had suicidal thoughts ?_Yes, Wo...",(Depression),0.129171,0.585499,0.122791,0.95061,1.623591,0.047162,8.392486,0.441053
2286,"(Have you ever had suicidal thoughts ?_Yes, Di...",(Depression),0.1353,0.585499,0.12849,0.949669,1.621983,0.049272,8.235486,0.443472
3803,"(Have you ever had suicidal thoughts ?_Yes, CG...",(Depression),0.130712,0.585499,0.122755,0.939128,1.60398,0.046224,6.809397,0.433171
3543,"(Have you ever had suicidal thoughts ?_Yes, Ag...",(Depression),0.108634,0.585499,0.101573,0.935005,1.596938,0.037968,6.377428,0.419358
2616,"(Have you ever had suicidal thoughts ?_Yes, Ag...",(Depression),0.201964,0.585499,0.188524,0.933452,1.594285,0.070274,6.228572,0.467096
2300,"(Have you ever had suicidal thoughts ?_Yes, Di...",(Depression),0.14193,0.585499,0.132433,0.933081,1.593651,0.049332,6.194057,0.434126


In [8]:
pprint(list(rules['antecedents'].head(10).values))

[frozenset({'Age Class_Teenager'}),
 frozenset({'Academic Pressure Class_High',
            'Age Class_Young Adult',
            'Financial Stress Class_High',
            'Have you ever had suicidal thoughts ?_Yes'}),
 frozenset({'Academic Pressure Class_High',
            'CGPA Class_High',
            'Financial Stress Class_High',
            'Have you ever had suicidal thoughts ?_Yes'}),
 frozenset({'Academic Pressure Class_High',
            'Financial Stress Class_High',
            'Have you ever had suicidal thoughts ?_Yes'}),
 frozenset({'Academic Pressure Class_High',
            'Have you ever had suicidal thoughts ?_Yes',
            'Work/Study Hours Class_High'}),
 frozenset({'Academic Pressure Class_High',
            'Dietary Habits_Unhealthy',
            'Have you ever had suicidal thoughts ?_Yes'}),
 frozenset({'Academic Pressure Class_High',
            'Age Class_Young Adult',
            'CGPA Class_High',
            'Have you ever had suicidal thoughts ?_Yes'})

In [9]:
# 筛选包含特定项的规则
specific_item = 'Depression' 
filtered_rules = rules[rules['antecedents'].apply(lambda x: specific_item in x) | 
                       rules['consequents'].apply(lambda x: specific_item in x)]

# 生成规则的解释
idx = 0
for _, rule in filtered_rules.iterrows():
    antecedents = ', '.join(list(rule['antecedents']))
    consequents = ', '.join(list(rule['consequents']))
    print(f"{idx} - If a customer has the attributes: {antecedents}, they are likely to have: {consequents} "
          f"(confidence: {rule['confidence']:.2f}, lift: {rule['lift']:.2f})")
    idx += 1
    if idx >= 10:
        break

0 - If a customer has the attributes: Have you ever had suicidal thoughts ?_Yes, Age Class_Young Adult, Financial Stress Class_High, Academic Pressure Class_High, they are likely to have: Depression (confidence: 0.97, lift: 1.66)
1 - If a customer has the attributes: Have you ever had suicidal thoughts ?_Yes, CGPA Class_High, Financial Stress Class_High, Academic Pressure Class_High, they are likely to have: Depression (confidence: 0.97, lift: 1.65)
2 - If a customer has the attributes: Have you ever had suicidal thoughts ?_Yes, Financial Stress Class_High, Academic Pressure Class_High, they are likely to have: Depression (confidence: 0.96, lift: 1.64)
3 - If a customer has the attributes: Have you ever had suicidal thoughts ?_Yes, Work/Study Hours Class_High, Academic Pressure Class_High, they are likely to have: Depression (confidence: 0.95, lift: 1.62)
4 - If a customer has the attributes: Have you ever had suicidal thoughts ?_Yes, Dietary Habits_Unhealthy, Academic Pressure Class_H