In [None]:
from flask import Flask, render_template, request, redirect, url_for
import time
import csv
import os
import pandas as pd
from collections import defaultdict, Counter
from os import listdir
from os.path import isfile, join
from werkzeug.utils import secure_filename
from itertools import combinations
import itertools
import numpy as np
import time

app = Flask(__name__)

@app.route('/')
def home():
    return render_template('apriori.html')

###### machine learning code || Apriori
#load dataset
def load_data_set(file_path):
    data_set = []
    with open(file_path, 'r') as csvfile:
        reader = csv.reader(csvfile)
        for row in reader:
            cleaned_row = [value.strip() for value in row if value.strip()]  # Remove empty cells
            if cleaned_row:  # Check if the row still has values after removing empty cells
                data_set.append(cleaned_row)
    return data_set

#Frequent item set creation
def create_C1(data_set):
    C1 = set()
    for t in data_set:
        for item in t:
            item_set = frozenset([item])
            C1.add(item_set)
    return  C1

#check apriori
def is_apriori(Ck_item, Lksub1):
    for item in Ck_item:
        sub_Ck = Ck_item - frozenset([item])
        if sub_Ck not in Lksub1:
            return  False
    return True

#Frequent k-itemset creation
def create_Ck(Lksub1, k):
    Ck = set()
    len_Lksub1  =  len ( Lksub1 )
    list_Lksub1 = list(Lksub1)
    for  i  in  range ( len_Lksub1 ):
        for  j  in  range ( 1 , len_Lksub1 ):
            l1 = list(list_Lksub1[i])
            l2 = list(list_Lksub1[j])
            l1.sort()
            l2.sort()
            if  l1 [ 0 : k - 2 ] ==  l2 [ 0 : k - 2 ]:
                Ck_item = list_Lksub1[i] | list_Lksub1[j]
                # pruning
                if is_apriori(Ck_item, Lksub1):
                    Ck.add(Ck_item)
    return Ck

def generate_Lk_by_Ck(data_set, Ck, min_support, support_data):
    Lk = set()
    item_count = {}
    for t in data_set:
        for item in Ck:
            if item.issubset(t):
                if item not in item_count:
                    item_count[item] = 1
                else:
                    item_count[item] += 1
    t_num = float(len(data_set))
    for item in item_count:
        if ((item_count[item] / t_num) >= min_support):
            Lk.add(item)
            support_data[item] = item_count[item] / t_num
    return Lk

def generate_L(data_set, k, min_support):
    support_data = {}
    C1 = create_C1(data_set)
    L1 = generate_Lk_by_Ck(data_set, C1, min_support, support_data)
    Lksub1 = L1.copy()
    L = []
    L.append(Lksub1)
    for  i  in  range ( 2 , k + 1 ):
        Ci = create_Ck(Lksub1, i)
        Li = generate_Lk_by_Ck(data_set, Ci, min_support, support_data)
        Lksub1 = Li.copy()
        L.append(Lksub1)
    return L, support_data

def generate_big_rules(L, support_data, min_conf):
    big_rule_list = []
    sub_set_list = []
    for  i  in  range ( 0 , len ( L )):
        for  freq_set  in  L [ i ]:
            for sub_set in sub_set_list:
                if sub_set.issubset(freq_set):
                    conf = support_data[freq_set] / support_data[freq_set - sub_set]
                    big_rule = (freq_set - sub_set, sub_set, conf)
                    if conf >= min_conf and big_rule not in big_rule_list:
                        #print(freq_set-sub_set, " => ", sub_set, "conf: ", conf)
                        big_rule_list.append(big_rule)
            sub_set_list.append(freq_set)
    return big_rule_list


###### machine learning code || Fp-Growth
def find_uniItems(transactions):
    unique_items = []
    for i in transactions:
        for j in i:
            if j not in unique_items:
                unique_items.append(j)
    return unique_items

def find_frequency(lists):
    result = {}
    for sub_list in lists:
        sub_list_counter = Counter(sub_list)
        for item, count in sub_list_counter.items():
            if item in result:
                result[item] += count
            else:
                result[item] = count
    return result

def remove_infrequent_and_sort(frequent_item_sets, min_support):
    temp_itemset = frequent_item_sets.copy()
    for key,values in frequent_item_sets.items():
        if values < min_support:
            temp_itemset.pop(key)
        elif key == '':
            temp_itemset.pop(key)
        else:
            continue

    frequent_item_sets = temp_itemset
    keys = list(frequent_item_sets.keys())
    values = list(frequent_item_sets.values())
    sorted_value_index = np.argsort(values)
    sorted_value_index = np.flip(sorted_value_index)
    frequent_item_sets = {keys[i]: values[i] for i in sorted_value_index}
    return frequent_item_sets

def build_ordered_itemset(transactions, frequent_item_sets):
    keys = list(frequent_item_sets.keys())
    temp_transactions = []
    for transaction in transactions:
        temp_items = []
        for item in transaction:
            if item in keys:
                temp_items.append(item)
        temp_transactions.append(temp_items)

    transactions = []
    for temp_transaction in temp_transactions:
        new_transaction = []
        for key in keys:
            if key in temp_transaction:
                new_transaction.append(key)
        transactions.append(new_transaction)

    return transactions


class Node:
    def __init__(self, item, count, parent):
        self.item = item
        self.count = count
        self.parent = parent
        self.children = {}
        self.next_node = None
        self.link = None

    def add_child(self, child):
        if child.item not in self.children:
            self.children[child.item] = child

    def increment_count(self, count):
        self.count += count

    def get_nodes_with_item(self, item):
        nodes = []
        if self.item == item:
            nodes.append(self)
        for child in self.children.values():
            nodes.extend(child.get_nodes_with_item(item))
        return nodes

class FPTree:
    def __init__(self):
        self.root = Node("*", 0, None)
        self.header_table = {}

    def add_transaction(self, transaction):
        current_node = self.root
        for item in transaction:
            child_node = current_node.children.get(item)
            if child_node is None:
                child_node = Node(item, 0, current_node)
                current_node.children[item] = child_node
                if item in self.header_table:
                    last_node = self.header_table[item]
                    while last_node.link is not None:
                        last_node = last_node.link
                    last_node.link = child_node
                else:
                    self.header_table[item] = child_node
            child_node.increment_count(1)
            current_node = child_node

    def get_frequent_items(self, min_support):
        frequent_items = {}
        for item in self.header_table:
            support = 0
            node = self.header_table[item]
            while node is not None:
                support += node.count
                node = node.link
            if support >= min_support:
                frequent_items[item] = support
        return frequent_items

    def get_nodes_with_item(self, item):
        return self.header_table.get(item, [])

def find_pattern_base(fptree, node, item):
    pattern_base = {}
    while node is not None:
        prefix_path = []
        temp_node = node
        while temp_node.parent is not None:
            if temp_node.name != "null" and temp_node.name != item:
                prefix_path.append(temp_node.name)
            temp_node = temp_node.parent
        if len(prefix_path) > 0:
            pattern_base[frozenset(prefix_path)] = node.count
        if node.link is None:
            break
        node = node.link
    return pattern_base

def create_subtree(fptree, min_support):
    items = list(fptree.header_table.keys())
    for item in items:
        support = 0
        node = fptree.header_table[item]
        while node is not None:
            support += node.count
            node = node.link
        if support < min_support:
            del fptree.header_table[item]
        else:
            fptree.header_table[item] = support
    for item in fptree.header_table:
        nodes = []
        node = fptree.header_table[item]
        while node is not None:
            nodes.append(node)
            node = node.link
        fptree.header_table[item] = nodes
    conditional_tree = FPTree()
    for item in items:
        pattern_base = find_pattern_base(fptree.root, item)
        for transaction, count in pattern_base.items():
            transaction_list = list(transaction)
            for i in range(count):
                conditional_tree.add_transaction(transaction_list)
    frequent_items = conditional_tree.get_frequent_items(min_support)
    for item in frequent_items:
        conditional_tree.header_table[item] = frequent_items[item]
    return conditional_tree

def generate_frequent_patterns(fptree, min_support, prefix=[]):
    items = [v[0] for v in sorted(fptree.items(), key=lambda kv: kv[1]['support'])]
    for item in items:
        new_prefix = prefix.copy()
        new_prefix.append(item)
        support = fptree[item]["support"]
        yield (new_prefix, support)
        conditional_pattern_base = find_pattern_base(fptree[item]["node_link"], item)
        conditional_tree = create_subtree(conditional_pattern_base, min_support)
        if len(conditional_tree) > 0:
            for pattern in generate_frequent_patterns(conditional_tree, min_support, new_prefix):
                yield pattern

class FP_Growth:
    def __init__(self, transactions, min_support):
        self.transactions = transactions
        self.min_support = min_support

    def build_fptree(self):
        self.fptree = FPTree()
        for transaction in self.transactions:
            self.fptree.add_transaction(transaction)

    def generate_frequent_itemsets(self, node, suffix):
        frequent_itemsets = []
        support = node.count
        for item, child_node in node.children.items():
            itemset = suffix.copy()
            itemset.add(item)
            frequent_itemsets.append((itemset, support))
            frequent_itemsets.extend(self.generate_frequent_itemsets(child_node, itemset))
        return frequent_itemsets

    def mine_frequent_itemsets(self):
        self.build_fptree()
        frequent_itemsets = []
        for item, count in self.fptree.get_frequent_items(self.min_support).items():
            frequent_itemsets.append((frozenset([item]), count))
        for itemset in itertools.chain.from_iterable(
                self.generate_frequent_itemsets(self.fptree.header_table[item], set()) for item in self.fptree.header_table):
            frequent_itemsets.append(itemset)
        return frequent_itemsets




def generate_combinations(items, length):
    return list(combinations(items, length))

def generate_support(data, combination):
    count = 0
    for transaction in data:
        if set(combination).issubset(set(transaction)):
            count += 1
    return count

def eclat_2(data, min_support):
    items = set()
    for transaction in data:
        for item in transaction:
            items.add(item)

    items = list(items)
    items.sort()

    results = {}

    length = 1
    while length <= len(items):
        combinations = generate_combinations(items, length)
        for combination in combinations:
            support = generate_support(data, combination)
            if support >= min_support:
                results[tuple(combination)] = support
        length += 1

    return results

@app.route('/apriori_algo',methods=["GET","POST"])
def apriori_algo():
    if request.method == 'POST':
        starting_time = time.time()
        file = request.files['file']
        min_support = request.form.get('min_support')
        min_conf = request.form.get('min_conf')
        min_support = float(min_support)
        min_conf = float(min_conf)
        i_index = 1
        web_apriori_output = [['frequent itemsets', 'support']]
        web_apriori_strong_association_output = [['Item 1', 'Item 2', 'Confidence']]


        # Save the file to ./uploads
        basepath = os.path.dirname(__file__)
        file_path = os.path.join(
            basepath, 'uploads', secure_filename(file.filename))
        file.save(file_path)

        data_set = load_data_set(file_path)

        L, support_data = generate_L(data_set, k=3, min_support = min_support)
        for i, Lk in enumerate(L):
            if len(Lk) > 0:
                print("=" * 50)
                print(f"frequent {len(list(Lk)[0])}-itemsets (frequent itemsets) \t \t support (support)")
                print("=" * 50)
                for freq_set in Lk:
                    min_support_value = int(support_data[freq_set]*len(data_set))
                    print(freq_set, min_support_value)
                    web_apriori_output.insert(i_index, [freq_set, min_support_value])
                    i_index = i_index+1
                else:
                    print(f"No frequent {i+1}-itemsets found.")

        big_rules_list = generate_big_rules(L, support_data, min_conf=min_conf)
        if(len(big_rules_list)>0):
            print ( "Strong Association\n" )
            i_index = 1
            for item in big_rules_list:
                print ( item [ 0 ], "=>" , item [ 1 ], "conf(confidence): " , item [ 2 ])
                web_apriori_strong_association_output.insert(i_index, [item [ 0 ],item [ 1 ],item [ 2 ]])
                i_index = i_index+1
        else:
            print ( "No Strong Association are found!" )

        with open("web_apriori_strong_association_output.csv", "w+") as my_csv:
            csvWriter = csv.writer(my_csv,delimiter=',')
            csvWriter.writerows(web_apriori_strong_association_output)
        df = pd.read_csv("web_apriori_strong_association_output.csv")

        with open("web_apriori_output.csv", "w+") as my_csv:
            csvWriter = csv.writer(my_csv,delimiter=',')
            csvWriter.writerows(web_apriori_output)
        apriori_algo_csv = pd.read_csv("web_apriori_output.csv")
        end_time = time.time()

        exe_time = end_time - starting_time
        exe_time = round(exe_time, 4)

    return render_template('apriori.html',tables=[df.to_html(classes='data')], tables1=[apriori_algo_csv.to_html(classes='data')], execution_time = exe_time)

@app.route('/fpgrowth_algo',methods=["GET","POST"])
def fpgrowth_algo():
    if request.method == 'POST':
        starting_time = time.time()
        file = request.files['file']
        min_support = request.form.get('min_support')
        ref = float(min_support)
        i_index = 1
        web_fpgrowth_output = [['frequent itemsets', 'support']]

        # Save the file to ./uploads
        basepath = os.path.dirname(__file__)
        file_path = os.path.join(
            basepath, 'uploads', secure_filename(file.filename))
        file.save(file_path)

        dataset = load_data_set(file_path)
        min_support_1 = ref *len(dataset)
        unique_items = find_uniItems(dataset)
        frequent_item_sets = find_frequency(dataset)
        frequent_item_sets = remove_infrequent_and_sort(frequent_item_sets, min_support_1)
        transactions = build_ordered_itemset(dataset, frequent_item_sets)

        # root, item_counts = construct_tree(dataset, min_support)
        # frequent_itemsets = []
        # fp_growth(root, item_counts, [], min_support, frequent_itemsets)
        fp = FP_Growth(transactions, min_support_1)
        frequent_itemsets = fp.mine_frequent_itemsets()


        print("Frequent Itemsets | Support")
        for itemset, support in frequent_itemsets:
            if(support > min_support_1):
                print(itemset, support)
                web_fpgrowth_output.insert(i_index, [itemset, support])
                i_index = i_index+1

        with open("web_fpgrowth_output.csv", "w+") as my_csv:
            csvWriter = csv.writer(my_csv,delimiter=',')
            csvWriter.writerows(web_fpgrowth_output)
        df = pd.read_csv("web_fpgrowth_output.csv")
        end_time = time.time()

        exe_time = end_time - starting_time
        print(exe_time)
        exe_time = round(exe_time, 4)

    return render_template('fpgrowth.html',tables=[df.to_html(classes='data')], execution_time = exe_time)


@app.route('/apriori')
def movie():
    return render_template('apriori.html')

@app.route('/fpgrowth')
def fpgrowth():
    return render_template('fpgrowth.html')

if __name__ == "__main__":
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat
