# 2.3: Data-driven Recruitment

In [1]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

### Loading and Processing Data

In [2]:
# Load the data
df = pd.read_csv("fau_clinic_recruitment.csv")

In [3]:
# Drop irrelevant columns
relevant_data = df.drop(columns=['gender', 'location', 'hired', 'family_nurse', 'occupational_health_nursing', 'gerontological_nursing'])

# Convert 'experience' column into bins
relevant_data['experience'] = pd.cut(relevant_data['experience'], bins=[0, 5, 10, 15, float('inf')], labels=['0-5', '5-10', '10-15', '15+'], right=False)


In [4]:
# One-hot encode the categorical columns
relevant_data_encoded = pd.get_dummies(relevant_data, columns=['experience', 'education', 'field'])

### Applying Apriori Algorithm

In [5]:
# Apply apriori algorithm to find frequent itemsets with support >= 0.02
frequent_itemsets = apriori(relevant_data_encoded, min_support=0.02, use_colnames=True)

### Generating Association Rules

In [6]:
# Generate association rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.25)

In [7]:
# Filter rules where the consequent is 'critical_care_nursing'
rules_critical_care_nursing = rules[rules['consequents'] == {'critical_care_nursing'}]


In [8]:
# Sort rules by lift in descending order
sorted_rules = rules_critical_care_nursing.sort_values(by=['lift'], ascending=False)

In [9]:
# Print the top 5 rules
print(sorted_rules.head(5))

                                            antecedents  \
981                   (professional, patience, empathy)   
922                     (patience, empathy, confidence)   
1034              (patience, education_master, empathy)   
311                             (professional, empathy)   
1684  (confidence, patience, field_family nurse prac...   

                  consequents  antecedent support  consequent support  \
981   (critical_care_nursing)            0.057333            0.097333   
922   (critical_care_nursing)            0.062000            0.097333   
1034  (critical_care_nursing)            0.087333            0.097333   
311   (critical_care_nursing)            0.079333            0.097333   
1684  (critical_care_nursing)            0.053333            0.097333   

       support  confidence      lift  leverage  conviction  zhangs_metric  
981   0.025333    0.441860  4.539662  0.019753    1.617278       0.827142  
922   0.025333    0.408602  4.197967  0.019299    1.52