### Import Required Libraries

In [21]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from mlxtend.frequent_patterns import apriori, association_rules
import warnings
warnings.filterwarnings('ignore')

np.random.seed(42)
plt.style.use('seaborn-v0_8-whitegrid')

print("âœ“ Libraries imported successfully")

âœ“ Libraries imported successfully


### Load Dataset

In [22]:
# Load dataset
df = pd.read_csv('Depression Professional Dataset.csv')
print(f"Dataset loaded: {df.shape}")
print(f"\nColumns: {df.columns.tolist()}")

Dataset loaded: (2054, 11)

Columns: ['Gender', 'Age', 'Work Pressure', 'Job Satisfaction', 'Sleep Duration', 'Dietary Habits', 'Have you ever had suicidal thoughts ?', 'Work Hours', 'Financial Stress', 'Family History of Mental Illness', 'Depression']


### Data Preparation for Association Rule Mining

In [23]:
# Create a copy for ARM
df_arm = df.copy()

# Discretize numerical columns into categorical bins
print("Discretizing numerical attributes...\n")

df_arm['Age_bin'] = pd.cut(df_arm['Age'], bins=[0, 25, 35, 45, 55, 65],
                           labels=['18-25', '25-35', '35-45', '45-55', '55+'])
print(f"Age binning: {df_arm['Age_bin'].value_counts().sort_index().to_dict()}")

df_arm['WorkPressure_bin'] = pd.cut(df_arm['Work Pressure'], bins=[0, 2, 4, 6],
                                     labels=['Low', 'Medium', 'High'])
print(f"\nWork Pressure binning: {df_arm['WorkPressure_bin'].value_counts().to_dict()}")

df_arm['JobSatisfaction_bin'] = pd.cut(df_arm['Job Satisfaction'], bins=[0, 2, 4, 6],
                                        labels=['Low', 'Medium', 'High'])
print(f"\nJob Satisfaction binning: {df_arm['JobSatisfaction_bin'].value_counts().to_dict()}")

df_arm['WorkHours_bin'] = pd.cut(df_arm['Work Hours'], bins=[-1, 4, 8, 12],
                                  labels=['Low', 'Medium', 'High'])
print(f"\nWork Hours binning: {df_arm['WorkHours_bin'].value_counts().to_dict()}")

df_arm['FinancialStress_bin'] = pd.cut(df_arm['Financial Stress'], bins=[0, 2, 4, 6],
                                        labels=['Low', 'Medium', 'High'])
print(f"\nFinancial Stress binning: {df_arm['FinancialStress_bin'].value_counts().to_dict()}")

Discretizing numerical attributes...

Age binning: {'18-25': 230, '25-35': 338, '35-45': 585, '45-55': 606, '55+': 295}

Work Pressure binning: {'Medium': 816, 'Low': 814, 'High': 424}

Job Satisfaction binning: {'Low': 827, 'Medium': 796, 'High': 431}

Work Hours binning: {'Low': 804, 'High': 651, 'Medium': 599}

Financial Stress binning: {'Low': 854, 'Medium': 795, 'High': 405}


### Create Binary Encoded Dataset for Apriori

In [24]:
# Select attributes for ARM
arm_cols = ['Gender', 'Age_bin', 'WorkPressure_bin', 'JobSatisfaction_bin', 
           'Sleep Duration', 'Dietary Habits', 'Have you ever had suicidal thoughts ?',
           'WorkHours_bin', 'FinancialStress_bin', 'Family History of Mental Illness', 'Depression']

# Create binary encoded dataset using one-hot encoding
df_arm_encoded = pd.DataFrame()
for col in arm_cols:
    if col in df_arm.columns:
        encoded = pd.get_dummies(df_arm[col], prefix=col)
        df_arm_encoded = pd.concat([df_arm_encoded, encoded], axis=1)

print(f"Encoded dataset shape: {df_arm_encoded.shape}")
print(f"Number of features (items): {df_arm_encoded.shape[1]}")
print(f"\nFirst 5 rows (subset):")
print(df_arm_encoded.iloc[:5, :10])

Encoded dataset shape: (2054, 32)
Number of features (items): 32

First 5 rows (subset):
   Gender_Female  Gender_Male  Age_bin_18-25  Age_bin_25-35  Age_bin_35-45  \
0           True        False          False          False           True   
1          False         True          False          False          False   
2           True        False          False          False           True   
3           True        False          False          False           True   
4          False         True          False          False          False   

   Age_bin_45-55  Age_bin_55+  WorkPressure_bin_Low  WorkPressure_bin_Medium  \
0          False        False                  True                    False   
1          False         True                 False                     True   
2          False        False                  True                    False   
3          False        False                 False                     True   
4           True        False             

## Apply Apriori Algorithm

In [25]:
print("="*80)
print("APPLYING APRIORI ALGORITHM")
print("="*80)

# Apply Apriori algorithm
min_support = 0.10  # 10% minimum support
frequent_itemsets = apriori(df_arm_encoded, min_support=min_support, use_colnames=True)

print(f"\nParameters:")
print(f"  Minimum Support: {min_support} ({min_support*100:.0f}%)")
print(f"\nResults:")
print(f"  Frequent Itemsets Found: {len(frequent_itemsets)}")
print(f"\nTop 10 Frequent Itemsets (by support):")
top_itemsets = frequent_itemsets.sort_values('support', ascending=False).head(10)
for idx, row in top_itemsets.iterrows():
    itemset = ', '.join(list(row['itemsets']))
    support = row['support']
    print(f"  {support:.4f} ({support*100:.2f}%): {itemset}")

APPLYING APRIORI ALGORITHM

Parameters:
  Minimum Support: 0.1 (10%)

Results:
  Frequent Itemsets Found: 525

Top 10 Frequent Itemsets (by support):
  0.9012 (90.12%): Depression_No
  0.5190 (51.90%): Gender_Male
  0.5185 (51.85%): Have you ever had suicidal thoughts ?_No
  0.5093 (50.93%): Family History of Mental Illness_No
  0.5029 (50.29%): Have you ever had suicidal thoughts ?_No, Depression_No
  0.4907 (49.07%): Family History of Mental Illness_Yes
  0.4815 (48.15%): Have you ever had suicidal thoughts ?_Yes
  0.4810 (48.10%): Gender_Female
  0.4688 (46.88%): Gender_Male, Depression_No
  0.4615 (46.15%): Family History of Mental Illness_No, Depression_No


## Generate Association Rules

In [26]:
# Generate association rules
min_confidence = 0.30  # 30% minimum confidence
rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=min_confidence)
rules['lift'] = rules['lift']
rules = rules.sort_values('lift', ascending=False)

print("="*80)
print("ASSOCIATION RULES GENERATED")
print("="*80)

print(f"\nParameters:")
print(f"  Minimum Confidence: {min_confidence} ({min_confidence*100:.0f}%)")
print(f"\nResults:")
print(f"  Association Rules Found: {len(rules)}")
print(f"\nTop 10 Rules (by Lift):")
for idx, (i, rule) in enumerate(rules.head(10).iterrows(), 1):
    antecedent = ', '.join(list(rule['antecedents']))
    consequent = ', '.join(list(rule['consequents']))
    print(f"\n  Rule {idx}:")
    print(f"    IF {antecedent}")
    print(f"    THEN {consequent}")
    print(f"    Support: {rule['support']:.4f}, Confidence: {rule['confidence']:.4f}, Lift: {rule['lift']:.4f}")

ASSOCIATION RULES GENERATED

Parameters:
  Minimum Confidence: 0.3 (30%)

Results:
  Association Rules Found: 1478

Top 10 Rules (by Lift):

  Rule 1:
    IF Have you ever had suicidal thoughts ?_Yes, Depression_No
    THEN Age_bin_45-55
    Support: 0.1456, Confidence: 0.3655, Lift: 1.2389

  Rule 2:
    IF Age_bin_45-55
    THEN Have you ever had suicidal thoughts ?_Yes, Depression_No
    Support: 0.1456, Confidence: 0.4934, Lift: 1.2389

  Rule 3:
    IF Gender_Female, WorkPressure_bin_Low
    THEN Family History of Mental Illness_Yes, Depression_No
    Support: 0.1008, Confidence: 0.5405, Lift: 1.2294

  Rule 4:
    IF WorkPressure_bin_Low, Family History of Mental Illness_No
    THEN Gender_Male, Depression_No
    Support: 0.1076, Confidence: 0.5581, Lift: 1.1903

  Rule 5:
    IF Family History of Mental Illness_Yes, Gender_Female, Depression_No
    THEN WorkPressure_bin_Low
    Support: 0.1008, Confidence: 0.4662, Lift: 1.1764

  Rule 6:
    IF WorkPressure_bin_Low, Have you eve

## Interpretation of Top 5 Association Rules

In [27]:
print("="*80)
print("DETAILED INTERPRETATION OF KEY ASSOCIATION RULES")
print("="*80)

# Focus on top 5-7 rules related to Depression
depression_rules = rules[rules['consequents'].apply(lambda x: 'Depression_Yes' in x)].head(5)

if len(depression_rules) > 0:
    print(f"\nRules predicting DEPRESSION (Top {len(depression_rules)}):")
    for idx, (i, rule) in enumerate(depression_rules.iterrows(), 1):
        antecedent = ', '.join(list(rule['antecedents']))
        consequent = ', '.join(list(rule['consequents']))
        print(f"\nRule {idx}:")
        print(f"  IF: {antecedent}")
        print(f"  THEN: {consequent}")
        print(f"  Support: {rule['support']:.4f} ({rule['support']*100:.1f}% of transactions)")
        print(f"  Confidence: {rule['confidence']:.4f} ({rule['confidence']*100:.1f}% of cases when antecedent is true)")
        print(f"  Lift: {rule['lift']:.4f} (likelihood increased by {(rule['lift']-1)*100:.1f}%)")
        
        if rule['confidence'] > 0.7:                
            print(f"  ðŸ“Š Interpretation: STRONG association - conditions strongly predict depression")
        elif rule['confidence'] > 0.5:
            print(f"  ðŸ“Š Interpretation: MODERATE association - conditions moderately predict depression")
        else:
            print(f"  ðŸ“Š Interpretation: WEAK association - conditions weakly predict depression")
else:
    print("\nNo depression-specific rules found. Showing top 5 general rules by lift:\n")
    for idx, (i, rule) in enumerate(rules.head(5).iterrows(), 1):
        antecedent = ', '.join(list(rule['antecedents']))
        consequent = ', '.join(list(rule['consequents']))
        print(f"\nRule {idx}:")
        print(f"  IF: {antecedent}")
        print(f"  THEN: {consequent}")
        print(f"  Support: {rule['support']:.4f}")
        print(f"  Confidence: {rule['confidence']:.4f}")
        print(f"  Lift: {rule['lift']:.4f}")

DETAILED INTERPRETATION OF KEY ASSOCIATION RULES

No depression-specific rules found. Showing top 5 general rules by lift:


Rule 1:
  IF: Have you ever had suicidal thoughts ?_Yes, Depression_No
  THEN: Age_bin_45-55
  Support: 0.1456
  Confidence: 0.3655
  Lift: 1.2389

Rule 2:
  IF: Age_bin_45-55
  THEN: Have you ever had suicidal thoughts ?_Yes, Depression_No
  Support: 0.1456
  Confidence: 0.4934
  Lift: 1.2389

Rule 3:
  IF: Gender_Female, WorkPressure_bin_Low
  THEN: Family History of Mental Illness_Yes, Depression_No
  Support: 0.1008
  Confidence: 0.5405
  Lift: 1.2294

Rule 4:
  IF: WorkPressure_bin_Low, Family History of Mental Illness_No
  THEN: Gender_Male, Depression_No
  Support: 0.1076
  Confidence: 0.5581
  Lift: 1.1903

Rule 5:
  IF: Family History of Mental Illness_Yes, Gender_Female, Depression_No
  THEN: WorkPressure_bin_Low
  Support: 0.1008
  Confidence: 0.4662
  Lift: 1.1764

No depression-specific rules found. Showing top 5 general rules by lift:


Rule 1:
  IF

### Save Association Rules

In [28]:
# Save results to CSV
arm_results = rules.copy()
arm_results['antecedents'] = arm_results['antecedents'].apply(lambda x: ', '.join(list(x)))
arm_results['consequents'] = arm_results['consequents'].apply(lambda x: ', '.join(list(x)))
arm_results[['antecedents', 'consequents', 'support', 'confidence', 'lift']].head(20).to_csv(
    'output_files/Task4_Association_Rules.csv', index=False)

print("\nâœ“ Saved: output_files/Task4_Association_Rules.csv")
print(f"\nTotal rules saved: {len(arm_results)} (showing top 20)")

print("\n" + "="*80)
print("TASK 4: ASSOCIATION RULE MINING COMPLETED")
print("="*80)


âœ“ Saved: output_files/Task4_Association_Rules.csv

Total rules saved: 1478 (showing top 20)

TASK 4: ASSOCIATION RULE MINING COMPLETED
