<a href="https://colab.research.google.com/github/aditya26091999/AssociativeClassifier/blob/master/AssociativeClassifier_SV_PHD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Associative Classifier (Classification based on Apriori Algorithm generated rules)

##### **Step 1 : Pre-requisite setup (Data cleaning, Transacional dataset preparation)**

1.1 Installing external dependencies and libraries

In [None]:
#Downloading external libraries
!pip install efficient-apriori

#Importing necessary dependencies and libraries
import sys
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from efficient_apriori import apriori

1.2 Fetching the dataset

In [None]:
#Upload the temporary dataset csv file to Google Colab Files section
#Update the below method to reference the appropriate CSV File.
original_dataset = pd.read_csv('/content/BuyComputerDataset.csv')
print('------------------\n#DATASET VALUES\n------------------')
display(original_dataset)

1.3 Basic EDA of Dataset

In [None]:
print('------------------\n#DATASET INFO()\n------------------')
original_dataset.info()
print('\n------------------\n#DATASET DESCRIBE()\n------------------')
original_dataset.describe()

**1.4 Coverting the dataset into transactional dataset**

In [None]:
#Set the Target Class Label
target_class_label = 'Buys_Computer'

#Create a independent copy of the original dataset to form transactional_dataset
transactional_dataset = original_dataset.copy(deep=True)

#Create a transaction_symbol reference dataset
transactional_symbol_reference = pd.DataFrame(columns=['Attribute', 'Value', 'Symbol','IsTarget'])

#List to track non-categorical columns
non_categorical_columns = list()

#Traverse the dataset columnwise to encode columnar data into symbols for the transactional_dataset
#Skipping the first column='RID'
print('------------------\n#CONVERTING TO TRANSACTIONAL DATASET\n------------------')
for attribute in transactional_dataset.columns:
  attribute_dtype = transactional_dataset[attribute].dtype

  if transactional_dataset[attribute].dtype == 'object':
    unique_attribute_values = list(transactional_dataset[attribute].unique())
    print('\nAttribute : ' + attribute + ' is categorical with unique value set : ', unique_attribute_values)

    #Transactional data encoding starts for that attribute column values
    for index, unique_val in enumerate(unique_attribute_values):
      transaction_symbol = attribute[:2] + '_' +str(index)
      transactional_symbol_reference = transactional_symbol_reference.append({'Attribute' : attribute, 'Value' : unique_val, 'Symbol' : transaction_symbol, 'IsTarget':False}, ignore_index = True)
      transactional_dataset.loc[transactional_dataset[attribute] == unique_val, attribute] = transaction_symbol

  else:
    non_categorical_columns.append([attribute,attribute_dtype])

print("\n------------------\n#CURRENT TRANSACTIONAL DATASET LOOKS LIKE THIS\n------------------")
display(transactional_dataset)

print("\n------------------\n#COLUMNS THAT COULDN'T BE CONVERTED\n------------------")
for entry in non_categorical_columns:
  print('\nAttribute : ' + entry[0] + ' is of type : ', entry[1])

print("\n------------------\n#WRITE CUSTOM LOGIC TO CONVERT THEM INTO CATEGORICAL\n------------------")

#Update the 'transactional_symbol_reference' IsTarget value=True for Target Class Categories
transactional_symbol_reference.loc[transactional_symbol_reference['Attribute'] == target_class_label , 'IsTarget'] = True

**1.5 Custom Logic for converting Non-Categorical Columns into Categorical**

1.5.1 For Age Attribute

In [None]:
#Custom Logic to convert Age Column into Categorical Values

#Defining the Category labels for Age Column and adding entries to 'transactional_symbol_reference' dataset
age_attribute_categories = {
    "AttributeName":"Age",
    "CategoryValues" : ["<=30","31...40",">40"]
}

#Automatic generation of transaction symbols for 'transaction_symbol_reference' dataset
attribute = age_attribute_categories['AttributeName']
for index, unique_val in enumerate(age_attribute_categories['CategoryValues']):
  transaction_symbol = attribute[:2] + '_' +str(index)
  transactional_symbol_reference = transactional_symbol_reference.append({'Attribute' : attribute, 'Value' : unique_val, 'Symbol' : transaction_symbol, 'IsTarget': False}, ignore_index = True)

#Writing the rules for encoding the non-categorical values
a = list(transactional_dataset.query('Age <= 30').index)
b = list(transactional_dataset.query('Age >= 31 and Age<=40').index)
c = list(transactional_dataset.query('Age > 40').index)


#Encoding the 'transactional_dataset' - Age attribute values with transaction symbols
transactional_dataset.loc[a, 'Age'] = 'Ag_0'
transactional_dataset.loc[b, 'Age'] = 'Ag_1'
transactional_dataset.loc[c, 'Age'] = 'Ag_2'

print("\n------------------\n#CURRENT TRANSACTIONAL DATASET LOOKS LIKE THIS\n------------------")
display(transactional_dataset)

1.6 Cleaning the Transactional_Dataset : Removing unnecessary attributes for Associate Rule Mining Calculation

In [None]:
#We need to remove RID attribute, as it is unnecessary for calculating Associate Rules
transactional_dataset = transactional_dataset.drop('RID', axis=1)

**1.7 Pre-requisites for AssociativeClassifier Completed**

**1.8 Showing the completed assets**

In [None]:
print('------------------\n#AFTER DROPPING UNWANTED COLUMNS : RID\n------------------')
print("\n------------------\n#TRANSACTIONAL DATASET LOOKS LIKE THIS\n------------------")
display(transactional_dataset)


print("\n------------------\n#TRANSACTIONAL SYMBOL REFERENCE TABLE LOOKS LIKE THIS\n------------------")
display(transactional_symbol_reference)

print("\n------------------\n#List of Symbols\n------------------")
list_of_symbols = list(transactional_symbol_reference['Symbol'])
display(list_of_symbols)

##### **Step 2 : Apriori Algorithm (Generating Association Rule mining rules)**

2.1 External Library Setup

*   Efficient-Apriori library needs to work with list of tuples
*   The dataset 'transactional_dataset' is currently in Pandas Dataframe format - making it incompatible with this library






In [None]:
#Code snippet to convert 'transactional_dataset' dataframe into list of tuples

transactional_dataset_tuplelist = [tuple(row) for row in transactional_dataset.values.tolist()]
for transaction_tuple in transactional_dataset_tuplelist:
  print(transaction_tuple)

2.2 Running the Apriori Algorithm on Transactional Dataset

In [None]:
# Running the Apriori Algorithm on generated 'transactional_dataset_tuplelist'
# Refer this article : https://pypi.org/project/efficient-apriori/

min_support = 0.1
min_confidence = 0.6
itemsets, rules = apriori(transactional_dataset_tuplelist, min_support=min_support, min_confidence=min_confidence)

total_association_rules_count = len(rules)

print('------------------\n#APRIORI ALGORITHM STATISTICS\n------------------\n')
print('------------------\n#MIN SUPPORT SELECTED    : {}'.format(min_support))
print('#MIN CONFIDENCE SELECTED : {}'.format(min_confidence))
print('#TOTAL COUNT OF RULES          : {}\n------------------'.format(total_association_rules_count))
print('\n------------------\n#GENERATED RULES\n------------------')
for index,rule in enumerate(rules):
  print('Rule #{} - {}'.format(index,rule))

2.3 Selecting Valid Rules for Associative Classifier (Checking RHS of Rules)

In [None]:
# Selecting only valid Rules for the Associative Classifier i.e Rules with Target Labels on Right Hand Side
valid_target_labels = list(transactional_symbol_reference.query('IsTarget == True')['Symbol'])
valid_rules = list(filter(lambda rule: any(x in rule.rhs for x in valid_target_labels) & (len(rule.rhs) == 1),rules))
total_valid_rules_count = len(list(valid_rules))

print('\n------------------\n#FOR ASSOCIATIVE CLASSIFIER')
print('#TOTAL RULES GIVEN BY APRIORI ALGORITHM : {}'.format(total_association_rules_count))
print('#VALID RULES FOR ASSOCIATIVE CLASSIFIER : {}\n------------------\n'.format(total_valid_rules_count))

print('\n------------------\n#VALID RULES FOR ASSOCIATIVE CLASSIFIER\n------------------')
for index,rule in enumerate(valid_rules):
  print('Rule #{} - {}'.format(index,rule))


##### **Step 3 : Testing the Associative Classifier**

3.1 Funtion to format results of Associative Classifier

In [None]:
#Function to show output of the Associative Classifier
def showOutputOfAssociativeClassfier(predicted_target_class_label):
  voting_method_predicted_class = transactional_symbol_reference.loc[transactional_symbol_reference['Symbol'] == predicted_target_class_label]
  c = voting_method_predicted_class['Attribute'].to_string(index=False)
  d = voting_method_predicted_class['Value'].to_string(index=False)

  for i in test_data:
    test_data_input_feature = transactional_symbol_reference.loc[transactional_symbol_reference['Symbol'] == i]
    a = test_data_input_feature['Attribute'].to_string(index=False)
    b = test_data_input_feature['Value'].to_string(index=False)
    print('({}, {})'.format(a,b), end=', ')

  print('==========> ({}, {})'.format(c,d)) 

3.2 Test Data Setup

In [None]:
#To apply valid rules to unseen test data
#Person with (Age : <=40, Income : Medium, Student : Yes,Credit Rating: Fair)
test_data = ('Ag_0','In_1','St_1','Cr_0')

#Calculate list of invalid symbols for this test_data
list_of_invalid_symbols = [x for x in list_of_symbols if (x not in test_data) & (x not in valid_target_labels)]
print('------------------\n#LIST OF INVALID SYMBOLS\n------------------\n')
print(list_of_invalid_symbols)

3.3 Running the Associative Classifier on test data

In [None]:
#Querying the 'valid_rules' set for applicable rules on the test_data
applicable_rules = list(filter(lambda rule: any(x in rule.lhs for x in test_data) & (rule.confidence >= min_confidence) & ~any(x in rule.lhs for x in list_of_invalid_symbols),valid_rules))

total_applicable_rules_count = len(applicable_rules)

print('\n------------------\n#FOR ASSOCIATIVE CLASSIFIER')
print('#TOTAL RULES GIVEN BY APRIORI ALGORITHM      : {}'.format(total_association_rules_count))
print('#VALID RULES FOR ASSOCIATIVE CLASSIFIER      : {}'.format(total_valid_rules_count))
print('#APPLICABLE RULES FOR ASSOCIATIVE CLASSIFIER : {}\n------------------\n'.format(total_applicable_rules_count))


print('\n------------------\n#VALID RULES FOR ASSOCIATIVE CLASSIFIER RANKED BY CONFIDENCE (DESCENDING)\n------------------')
# for index,rule in enumerate(applicable_rules):
#   print('Rule #{} - {}'.format(index,rule))

sorted_applicable_rules = sorted(applicable_rules, key=lambda rule: rule.confidence, reverse=True)
for index,rule in enumerate(sorted_applicable_rules):
  print('Rule #{} - {}'.format(index,rule))

print('\n------------------\n#Class Prediction : Highest Confidence Method\n------------------')
showOutputOfAssociativeClassfier(sorted_applicable_rules[0].rhs[0])


print('\n------------------\n#Class Prediction : Voting Method\n------------------')
predicted_target_class_list = list(map(lambda rule : rule.rhs[0],applicable_rules))
voting_method_predicted_class_label = max(predicted_target_class_list,key=predicted_target_class_list.count)
showOutputOfAssociativeClassfier(voting_method_predicted_class_label)