## Association Rule Mining using Apriori Algorithm

Considering traffic accident data, discover the associate patterns, we need to binarize each categorical attribute, e.g., 
introducing two variables representing ‘good’ or ‘bad’ weather condition, or using two variables representing ‘Yes’ or ‘No’ 
for Seal Belt. 

Write a script to generate the frequent itemsets using any threshold for the minimal support. 
Write an Apriori algorithm to generate the association rules with high confidences.

### Importing the required packages

In [147]:
import os
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

### Reading CSV file into data frame from the current working directory

In [148]:
def read_csv_file():
    directory = os.getcwd()
    print("The directory to read the csv files from is: ", directory)
    # Concatenating the absolute file path and file name
    traffic_accident_file = os.path.join(os.path.abspath(directory), 'traffic_accident_data.csv')

    if os.path.exists(traffic_accident_file):
        traffic_accident = pd.read_csv(traffic_accident_file, sep=';')
    else:
        print('File traffic_accident_data.csv does not exists at the location')

    return traffic_accident

traffic_accident = read_csv_file()

The directory to read the csv files from is:  C:\Users\munge\Data Mining and Knowledge Discovery\Association Rule Mining Apriori Algorithm


In [149]:
traffic_accident

Unnamed: 0,Weather Condition,Driver's Condition,Traffic Violation,Seat Belt,Crash Severity
0,Good,Alcohol-impaired,Exceed speed limit,No,Major
1,Bad,Sober,,Yes,Minor
2,Good,Sober,Disobey stop sign,Yes,Minor
3,Good,Sober,Exceed speed limit,Yes,Major
4,Bad,Sober,Disobey traffic signal,No,Major
5,Good,Alcohol-impaired,Disobey stop sign,Yes,Minor
6,Bad,Alcohol-impaired,,Yes,Major
7,Good,Sober,Disobey traffic signal,Yes,Major
8,Good,Alcohol-impaired,,No,Major
9,Bad,Sober,Disobey traffic signal,No,Major


### Introducing seperate columns based on each column value in a row

In [150]:
traffic_accident['Good_Weather_Condition']=np.where(traffic_accident['Weather Condition']=='Good', True, False)
traffic_accident['Bad_Weather_Condition']=np.where(traffic_accident['Weather Condition']=='Bad', True, False)

traffic_accident['Sober_Driver']=np.where(traffic_accident["Driver's Condition"]== 'Sober', True, False)
traffic_accident['Alcoholic_Driver']=np.where(traffic_accident["Driver's Condition"]== 'Alcohol-impaired', True, False)

traffic_accident['No_Traffic_Violation']=np.where(traffic_accident['Traffic Violation']=='None', True, False)
traffic_accident['Traffic_Signal_Violation']=np.where(traffic_accident['Traffic Violation']=='Disobey traffic signal', True, False)
traffic_accident['Speed_Limit_Violation']=np.where(traffic_accident['Traffic Violation']=='Exceed speed limit', True, False)
traffic_accident['Stop_Sign_Violation']=np.where(traffic_accident['Traffic Violation']=='Disobey stop sign', True, False)

traffic_accident['Seat_Belt_Present']=np.where(traffic_accident['Seat Belt']=='Yes', True, False)
traffic_accident['Seat_Belt_Absent']=np.where(traffic_accident['Seat Belt']=='No', True, False)

traffic_accident['Minor_Crash']=np.where(traffic_accident['Crash Severity']=='Minor', True, False)
traffic_accident['Major_Crash']=np.where(traffic_accident['Crash Severity']=='Major', True, False)
traffic_accident_data = traffic_accident[['Good_Weather_Condition','Bad_Weather_Condition',
                                          'Sober_Driver','Alcoholic_Driver',
                                          'No_Traffic_Violation','Traffic_Signal_Violation','Speed_Limit_Violation','Stop_Sign_Violation',
                                          'Seat_Belt_Present','Seat_Belt_Absent',
                                          'Minor_Crash','Major_Crash']]
traffic_accident_data

Unnamed: 0,Good_Weather_Condition,Bad_Weather_Condition,Sober_Driver,Alcoholic_Driver,No_Traffic_Violation,Traffic_Signal_Violation,Speed_Limit_Violation,Stop_Sign_Violation,Seat_Belt_Present,Seat_Belt_Absent,Minor_Crash,Major_Crash
0,True,False,False,True,False,False,True,False,False,True,False,True
1,False,True,True,False,True,False,False,False,True,False,True,False
2,True,False,True,False,False,False,False,True,True,False,True,False
3,True,False,True,False,False,False,True,False,True,False,False,True
4,False,True,True,False,False,True,False,False,False,True,False,True
5,True,False,False,True,False,False,False,True,True,False,True,False
6,False,True,False,True,True,False,False,False,True,False,False,True
7,True,False,True,False,False,True,False,False,True,False,False,True
8,True,False,False,True,True,False,False,False,False,True,False,True
9,False,True,True,False,False,True,False,False,False,True,False,True


### Frequent Itemsets

In [151]:
frequent_itemsets = apriori(traffic_accident_data, min_support=0.2, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

Unnamed: 0,support,itemsets,length
0,0.583333,(Good_Weather_Condition),1
1,0.416667,(Bad_Weather_Condition),1
2,0.583333,(Sober_Driver),1
3,0.416667,(Alcoholic_Driver),1
4,0.25,(No_Traffic_Violation),1
5,0.25,(Traffic_Signal_Violation),1
6,0.25,(Speed_Limit_Violation),1
7,0.25,(Stop_Sign_Violation),1
8,0.666667,(Seat_Belt_Present),1
9,0.333333,(Seat_Belt_Absent),1


## Generated the association rules with high confidences

In [152]:
association_rules = association_rules(frequent_itemsets, metric='confidence', min_threshold=0.2)
# Sorted the confidence value in descending order
association_rules.sort_values(by=['confidence'], ascending=False)

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
34,(Stop_Sign_Violation),(Minor_Crash),0.250000,0.333333,0.25,1.000,3.0,0.166667,inf
44,"(Good_Weather_Condition, Sober_Driver)",(Seat_Belt_Present),0.250000,0.666667,0.25,1.000,1.5,0.083333,inf
69,(Traffic_Signal_Violation),"(Sober_Driver, Major_Crash)",0.250000,0.333333,0.25,1.000,3.0,0.166667,inf
59,(Speed_Limit_Violation),"(Good_Weather_Condition, Major_Crash)",0.250000,0.416667,0.25,1.000,2.4,0.145833,inf
31,(Speed_Limit_Violation),(Major_Crash),0.250000,0.666667,0.25,1.000,1.5,0.083333,inf
...,...,...,...,...,...,...,...,...,...
71,(Major_Crash),"(Traffic_Signal_Violation, Sober_Driver)",0.666667,0.250000,0.25,0.375,1.5,0.083333,1.200000
30,(Major_Crash),(Speed_Limit_Violation),0.666667,0.250000,0.25,0.375,1.5,0.083333,1.200000
29,(Major_Crash),(Traffic_Signal_Violation),0.666667,0.250000,0.25,0.375,1.5,0.083333,1.200000
63,(Seat_Belt_Present),"(Good_Weather_Condition, Major_Crash)",0.666667,0.416667,0.25,0.375,0.9,-0.027778,0.933333
