In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
!ngrok config add-authtoken 2o4fM5DLoDPE9ytrEnzf5GvP8Yc_2cNnSGLck7F6TE836DapK

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


In [None]:
!pip install streamlit pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.0-py3-none-any.whl.metadata (7.4 kB)
Downloading pyngrok-7.2.0-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.0


In [None]:
%%writefile app.py
import pandas as pd
import streamlit as st
from itertools import combinations
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx

class MarketBasketAnalysis:
    def __init__(self, data, min_support=0.05, min_confidence=0.5):
        self.data = data
        self.min_support = min_support
        self.min_confidence = min_confidence
        self.transactions = []
        self.frequent_itemsets = {}
        self.rules = []

    def load_data(self):
        """Create transactions from the loaded dataset."""
        df = self.data
        st.write("Data Loaded:")

        # Group items by 'Member_number' to create transactions
        self.transactions = df.groupby('Member_number')['itemDescription'].apply(list).reset_index()['itemDescription'].tolist()

        # Display total number of transactions and unique items
        total_transactions = len(self.transactions)
        unique_items = df['itemDescription'].nunique()

        st.write(f"Total Transactions: {total_transactions}")
        st.write(f"Total Unique Items: {unique_items}")

    def apriori(self):
        """Run the Apriori algorithm to find frequent itemsets and generate rules."""
        item_count = {}
        num_transactions = len(self.transactions)

        # Count item frequency
        for transaction in self.transactions:
            for item in transaction:
                item_count[item] = item_count.get(item, 0) + 1

        # Create initial frequent itemsets (1-itemsets)
        for item, count in item_count.items():
            if count / num_transactions >= self.min_support:
                self.frequent_itemsets[frozenset([item])] = count

        k = 2

        # Generate frequent itemsets
        while True:
            candidate_itemsets = {}
            frequent_items = list(self.frequent_itemsets.keys())

            # Generate new candidates from previous frequent itemsets
            for i in range(len(frequent_items)):
                for j in range(i + 1, len(frequent_items)):
                    candidate = frequent_items[i] | frequent_items[j]
                    if len(candidate) == k:  # Only include k-size itemsets
                        candidate_itemsets[candidate] = 0

            # Count support for candidate itemsets
            for transaction in self.transactions:
                transaction_set = frozenset(transaction)
                for candidate in candidate_itemsets:
                    if candidate.issubset(transaction_set):
                        candidate_itemsets[candidate] += 1

            # Prune candidates that do not meet min_support
            frequent_itemsets = {}
            for candidate, count in candidate_itemsets.items():
                if count / num_transactions >= self.min_support:
                    frequent_itemsets[candidate] = count
                    self.frequent_itemsets[candidate] = count

            if not frequent_itemsets:
                break

            k += 1

        self.generate_rules()

    def generate_rules(self):
        """Generate association rules from frequent itemsets."""
        for itemset in self.frequent_itemsets:
            if len(itemset) > 1:
                for i in range(1, len(itemset)):
                    for antecedent in combinations(itemset, i):
                        antecedent = frozenset(antecedent)
                        consequent = itemset - antecedent
                        if consequent:
                            support = self.frequent_itemsets[itemset] / len(self.transactions)
                            confidence = self.frequent_itemsets[itemset] / self.frequent_itemsets.get(antecedent, 1)
                            consequent_support = self.frequent_itemsets.get(consequent, 1) / len(self.transactions)
                            lift = confidence / consequent_support if consequent_support > 0 else 0

                            if confidence >= self.min_confidence:
                                self.rules.append((antecedent, consequent, support, confidence, lift))

class MarketBasketVisualization:
    def __init__(self, mba):
        self.mba = mba

    def plot_frequent_itemsets(self, top_n=10):
        """Plot the top N frequent itemsets."""
        sorted_itemsets = sorted(self.mba.frequent_itemsets.items(), key=lambda x: x[1], reverse=True)[:top_n]
        itemsets, counts = zip(*sorted_itemsets)

        fig, ax = plt.subplots(figsize=(10, 6))
        ax.bar([str(set(itemset)) for itemset in itemsets], counts, color='skyblue')
        ax.set_xlabel('Itemsets')
        ax.set_ylabel('Frequency')
        ax.set_title(f'Top {top_n} Frequent Itemsets')
        ax.set_xticklabels([str(set(itemset)) for itemset in itemsets], rotation=45, ha='right')
        plt.tight_layout()
        st.pyplot(fig)  # Display plot in Streamlit

    def plot_support_vs_confidence(self):
        """Plot support vs confidence for the generated rules."""
        supports = [rule[2] for rule in self.mba.rules]
        confidences = [rule[3] for rule in self.mba.rules]

        fig, ax = plt.subplots(figsize=(10, 6))
        ax.scatter(supports, confidences, alpha=0.7)
        ax.set_title('Support vs Confidence of Rules')
        ax.set_xlabel('Support')
        ax.set_ylabel('Confidence')
        ax.axhline(y=self.mba.min_confidence, color='r', linestyle='--')
        ax.axvline(x=self.mba.min_support, color='g', linestyle='--')
        ax.grid()
        st.pyplot(fig)  # Display plot in Streamlit

    def plot_lift_distribution(self):
        """Plot the distribution of lift values."""
        lifts = [rule[4] for rule in self.mba.rules]

        fig, ax = plt.subplots(figsize=(10, 6))
        ax.hist(lifts, bins=20, color='lightcoral', alpha=0.7)
        ax.set_xlabel('Lift')
        ax.set_ylabel('Frequency')
        ax.set_title('Distribution of Lift Values')
        ax.grid()
        plt.tight_layout()
        st.pyplot(fig)  # Display plot in Streamlit

    def plot_heatmap_of_rules(self):
        """Plot a heatmap of association rules."""
        rule_df = pd.DataFrame(self.mba.rules, columns=['Antecedent', 'Consequent', 'Support', 'Confidence', 'Lift'])

        # Format antecedents and consequents for display
        rule_df['Antecedent'] = rule_df['Antecedent'].apply(lambda x: ', '.join(list(x)))
        rule_df['Consequent'] = rule_df['Consequent'].apply(lambda x: ', '.join(list(x)))

        # Create a pivot table for the heatmap
        pivot_table = rule_df.pivot(index="Antecedent", columns="Consequent", values="Confidence")

        fig, ax = plt.subplots(figsize=(12, 8))
        sns.heatmap(pivot_table, annot=True, fmt=".2f", cmap='coolwarm', cbar_kws={'label': 'Confidence'}, ax=ax)
        ax.set_title('Heatmap of Association Rules')
        ax.set_xlabel('Consequent')
        ax.set_ylabel('Antecedent')
        st.pyplot(fig)  # Display plot in Streamlit

    def plot_network_graph(self):
        """Plot a network graph of association rules."""
        G = nx.Graph()  # Create an undirected graph for better visualization

        # Add edges to the graph
        for antecedent, consequent, support, confidence, lift in self.mba.rules:
            for a in antecedent:
                for c in consequent:
                    G.add_edge(str(a), str(c), weight=support)

        # Create a layout for the nodes
        pos = nx.spring_layout(G, k=0.5)  # You can adjust k for spacing

        # Create the figure and axis
        fig, ax = plt.subplots(figsize=(12, 12))

        # Draw nodes and edges
        nx.draw_networkx_nodes(G, pos, node_size=700, node_color='lightblue', ax=ax)
        nx.draw_networkx_edges(G, pos, arrowstyle='-|>', arrowsize=20, edge_color='gray', alpha=0.5, ax=ax)
        nx.draw_networkx_labels(G, pos, font_size=12, ax=ax)

        # Set title and hide axes
        ax.set_title('Network Graph of Association Rules')
        ax.axis('off')

        # Display the plot in Streamlit
        st.pyplot(fig)

def main():
    st.title("Market Basket Analysis")

    # File uploader for the dataset
    uploaded_file = st.file_uploader("Upload Groceries Dataset", type=["csv"])
    if uploaded_file is not None:
        df = pd.read_csv(uploaded_file)
        st.dataframe(df.head())  # Display the first few rows of the dataset

        # Create an instance of MarketBasketAnalysis
        mba = MarketBasketAnalysis(data=df)
        mba.load_data()  # Load the data and create transactions
        mba.apriori()  # Run the Apriori algorithm

        # Display Frequent Itemsets
        st.subheader("Frequent Itemsets:")
        itemsets_df = pd.DataFrame(list(mba.frequent_itemsets.items()), columns=['Itemset', 'Count'])
        itemsets_df['Itemset'] = itemsets_df['Itemset'].apply(lambda x: ', '.join(list(x)))
        st.dataframe(itemsets_df, height=400, use_container_width=True)

        # Display Association Rules
        st.subheader("Association Rules:")
        rules_df = pd.DataFrame(mba.rules, columns=['Antecedent', 'Consequent', 'Support', 'Confidence', 'Lift'])

        # Format antecedents and consequents for display
        rules_df['Antecedent'] = rules_df['Antecedent'].apply(lambda x: ', '.join(list(x)))
        rules_df['Consequent'] = rules_df['Consequent'].apply(lambda x: ', '.join(list(x)))
        st.dataframe(rules_df)

        # Create an instance of MarketBasketVisualization
        mbv = MarketBasketVisualization(mba)

        # Visualizations
        st.subheader("Visualizations")
        visualization_option = st.selectbox("Select Visualization",
                                              ["Top Frequent Itemsets",
                                               "Support vs Confidence",
                                               "Lift Distribution",
                                               "Heatmap of Association Rules",
                                               "Network Graph of Association Rules"])

        if visualization_option == "Top Frequent Itemsets":
            mbv.plot_frequent_itemsets(top_n=10)
        elif visualization_option == "Support vs Confidence":
            mbv.plot_support_vs_confidence()
        elif visualization_option == "Lift Distribution":
            mbv.plot_lift_distribution()
        elif visualization_option == "Heatmap of Association Rules":
            mbv.plot_heatmap_of_rules()
        elif visualization_option == "Network Graph of Association Rules":
            mbv.plot_network_graph()

if __name__ == "__main__":
    main()

Overwriting app.py


In [None]:
import threading
import os
import time
from pyngrok import ngrok

# Function to run Streamlit
def run_streamlit():
    os.system("streamlit run app.py")

# Start Streamlit in a background thread
thread = threading.Thread(target=run_streamlit)
thread.start()

# Allow time for Streamlit to start
time.sleep(5)

# Expose the Streamlit app on a public URL using ngrok
try:
    public_url = ngrok.connect(8501)
    print(f"Streamlit app is running on: {public_url}")

    # Keep the script running to maintain the ngrok tunnel
    while True:
        time.sleep(1)

except Exception as e:
    print(f"Error starting ngrok: {e}")
finally:
    print("Shutting down...")
    ngrok.disconnect(public_url)  # Disconnect ngrok if it was connected
    thread.join()  # Wait for the Streamlit thread to finish


Streamlit app is running on: NgrokTunnel: "https://0736-34-48-2-149.ngrok-free.app" -> "http://localhost:8501"


