In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import datetime as dt

# ----------------------------
# INSTALL
# !pip install mlxtend

# ----------------------------
# TRANSACTION ENCODER
from mlxtend.preprocessing import TransactionEncoder

# ----------------------------
# APRIORI FUNCTION
from mlxtend.frequent_patterns import apriori, association_rules

# ----------------------------
# ITERTOOLS
import itertools

# ----------------------------
# CONFIGURATION
import warnings
warnings.filterwarnings("ignore")
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option('display.max_columns', None)
pd.options.display.float_format = '{:.2f}'.format
df = pd.read_excel("/content/drive/MyDrive/CyberCypher3.0_Project/grocery_database.xlsx",names=['products'],header=None)
df
data = list(df["products"].apply(lambda x:x.split(',')))

from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_data = te.fit(data).transform(data)
df = pd.DataFrame(te_data,columns=te.columns_).astype(int)

# Find Frequency of Items
df.sum()
# finding Product Frequency / Total Sales
first = pd.DataFrame(df.sum() / df.shape[0], columns = ["Support"]).sort_values("Support", ascending = False)
first
# Elimination by Support Value
first[first.Support >= 0.13]
#Second Iteration: Find support values for pair product combinations.
second = list(itertools.combinations(first.index, 2))
second = [list(i) for i in second]
# Sample of combinations
second[:10]
# Finding support values
value = []
for i in range(0, len(second)):
    temp = df.T.loc[second[i]].sum()
    temp = len(temp[temp == df.T.loc[second[i]].shape[0]]) / df.shape[0]
    value.append(temp)
# Create a data frame
secondIteration = pd.DataFrame(value, columns = ["Support"])
secondIteration["index"] = [tuple(i) for i in second]
secondIteration['length'] = secondIteration['index'].apply(lambda x:len(x))
secondIteration = secondIteration.set_index("index").sort_values("Support", ascending = False)
# Elimination by Support Value
secondIteration = secondIteration[secondIteration.Support > 0.1]
secondIteration
def ar_iterations(data, num_iter = 1, support_value = 0.1, iterationIndex = None):

    # Next Iterations
    def ar_calculation(iterationIndex = iterationIndex):
        # Calculation of support value
        value = []
        for i in range(0, len(iterationIndex)):
            result = data.T.loc[iterationIndex[i]].sum()
            result = len(result[result == data.T.loc[iterationIndex[i]].shape[0]]) / data.shape[0]
            value.append(result)
        # Bind results
        result = pd.DataFrame(value, columns = ["Support"])
        result["index"] = [tuple(i) for i in iterationIndex]
        result['length'] = result['index'].apply(lambda x:len(x))
        result = result.set_index("index").sort_values("Support", ascending = False)
        # Elimination by Support Value
        result = result[result.Support > support_value]
        return result

    # First Iteration
    first = pd.DataFrame(df.T.sum(axis = 1) / df.shape[0], columns = ["Support"]).sort_values("Support", ascending = False)
    first = first[first.Support > support_value]
    first["length"] = 1

    if num_iter == 1:
        res = first.copy()

    # Second Iteration
    elif num_iter == 2:

        second = list(itertools.combinations(first.index, 2))
        second = [list(i) for i in second]
        res = ar_calculation(second)

    # All Iterations > 2
    else:
        nth = list(itertools.combinations(set(list(itertools.chain(*iterationIndex))), num_iter))
        nth = [list(i) for i in nth]
        res = ar_calculation(nth)

    return res
iteration1 = ar_iterations(df, num_iter=1, support_value=0.1)
iteration1
iteration2 = ar_iterations(df, num_iter=2, support_value=0.1)
iteration2
iteration3 = ar_iterations(df, num_iter=3, support_value=0.01,
              iterationIndex=iteration2.index)
iteration3
iteration4 = ar_iterations(df, num_iter=4, support_value=0.01,
              iterationIndex=iteration3.index)
iteration4
# Apriori
freq_items = apriori(df, min_support = 0.1, use_colnames = True, verbose = 1)
freq_items.sort_values("support", ascending = False)
freq_items.sort_values("support", ascending = False).head(5)
freq_items.sort_values("support", ascending = False).tail(5)
# Association Rules & Info
df_ar = association_rules(freq_items, metric = "confidence", min_threshold = 0.5)
df_ar
df_ar[(df_ar.support > 0.15) & (df_ar.confidence > 0.5)].sort_values("confidence", ascending = False)



In [2]:
import streamlit as st
import pandas as pd
from io import StringIO
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules
import itertools

# Load the grocery dataset
data = """JAM,MAGGI,BREAD,MILK
MAGGI,TEA,BISCUIT
BREAD,TEA,BOURNVITA
MAGGI,TEA,CORNFLAKES
MAGGI,BREAD,TEA,BISCUIT
JAM,MAGGI,BREAD,TEA
BREAD,MILK
COFFEE,COCK,BISCUIT,CORNFLAKES
COFFEE,COCK,BISCUIT,CORNFLAKES
COFFEE,SUGER,BOURNVITA
BREAD,COFFEE,COCK
BREAD,SUGER,BISCUIT
COFFEE,SUGER,CORNFLAKES
BREAD,SUGER,BOURNVITA
BREAD,COFFEE,SUGER
BREAD,COFFEE,SUGER
TEA,MILK,COFFEE,CORNFLAKES
"""

df = pd.read_csv(StringIO(data), header=None)

# Convert all values to strings
df = df.applymap(str)

# Preprocess the data
te = TransactionEncoder()
te_data = te.fit(df.values).transform(df.values)
df_encoded = pd.DataFrame(te_data, columns=te.columns_)

# Apriori function
def run_apriori(data, min_support=0.1, min_confidence=0.5):
    frequent_items = apriori(data, min_support=min_support, use_colnames=True)
    rules = association_rules(frequent_items, metric="confidence", min_threshold=min_confidence)
    return frequent_items, rules

# Streamlit app
st.title("Apriori Association Rules App")

# User input for item selection
selected_item = st.selectbox("Select an item to check for association rules", df.columns)

# Get frequent items and association rules
frequent_items, rules = run_apriori(df_encoded)

# Display frequent items
st.subheader("Frequent Items")
st.write(frequent_items)

# Display association rules
st.subheader("Association Rules")
st.write(rules)

# Display association rules for the selected item
st.subheader(f"Association Rules for {selected_item}")
filtered_rules = rules[rules["consequents"].apply(lambda x: selected_item in x)]
st.write(filtered_rules)


2024-01-14 03:07:48.215 
  command:

    streamlit run c:\Users\aryan\AppData\Local\Programs\Python\Python311\Lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2024-01-14 03:07:48.347 Serialization of dataframe to Arrow table was unsuccessful due to: ("Could not convert frozenset({'BISCUIT'}) with type frozenset: did not recognize Python value type when inferring an Arrow data type", 'Conversion failed for column itemsets with type object'). Applying automatic fixes for column types to make the dataframe Arrow-compatible.
2024-01-14 03:07:48.400 Serialization of dataframe to Arrow table was unsuccessful due to: ("Could not convert frozenset({'COCK'}) with type frozenset: did not recognize Python value type when inferring an Arrow data type", 'Conversion failed for column antecedents with type object'). Applying automatic fixes for column types to make the dataframe Arrow-compatible.


In [3]:
streamlit run

SyntaxError: invalid syntax (1135476472.py, line 1)