## Import Libraries

In [1]:
import pandas as pd  # For data manipulation
import re  # For regular expressions
from mlxtend.frequent_patterns import association_rules, apriori  # For association rule mining
from dash import Dash, dcc, html, Input, Output, callback  # For creating the web application
import numpy  # For array manipulation
import pickle  # For serializing and deserializing Python objects
import warnings
warnings.filterwarnings('ignore')

  _dash_comm = Comm(target_name="dash")


## Load Dataset

In [2]:
# Path to the dataset
dataset_path = "dummy dataset.csv"

# Read the dataset using pandas
df = pd.read_csv(dataset_path)

# Preprocess text data
df['lower_text'] = df['Text'].apply(lambda x: re.sub('\W+', ' ', x.lower()))
df.reset_index(inplace=True)

## Data Processing for Association Rule Mining

In [3]:
# Transform text data into transactions
full_data = pd.DataFrame()
for i in df['index']:
    # Split text into words
    l1 = df['lower_text'].iloc[i].split(' ')
    temp = pd.DataFrame({'item': l1, 'index': i})
    full_data = pd.concat([full_data, temp], axis=0)

# Group by transaction and item, count occurrences
transactions_str = full_data[(full_data['item'] != '')].groupby(['index', 'item'])['item'].count().reset_index(name='Count')

# Pivot table to create a matrix representation of transactions
my_basket = transactions_str.pivot_table(index='index', columns='item', values='Count', aggfunc='sum').fillna(0)

# Function to encode data as 0 or 1
def encode(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

# Apply encoding function to the dataset
my_basket_sets = my_basket.applymap(encode)

## Association Rule Mining

In [4]:
# Applying Apriori algorithm to find frequent itemsets
frequent_items = apriori(my_basket_sets, min_support=0.01, use_colnames=True)

# Generate association rules from frequent itemsets
rules = association_rules(frequent_items, metric="lift", min_threshold=1)
rules.sort_values('confidence', ascending=False, inplace=True)
rules['antecedents_consequents'] = rules[['antecedents', 'consequents']].apply(lambda x: list(x['antecedents']) + list(x['consequents']), axis=1)

# Save the generated rules to a file
with open('association_rules_model.pkl', 'wb') as file:
    pickle.dump(rules, file)

## Creating Dash Web Application

In [5]:
external_stylesheets = ["https://codepen.io/chriddyp/pen/bWLwgP.css"]

# Load the saved model
with open('association_rules_model.pkl', 'rb') as file:
    saved_rules = pickle.load(file)

app = Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div(
    [
        html.H1("Next Word Prediction using Apriori Algorithm"),
        html.I("Enter you input here :"),
        html.Br(),
        dcc.Input(id="input1", type="text", placeholder="Sentence", style={'marginRight':'10px','width':'90%'}),
        html.Br(),html.Br(),
        html.Label("Predicted next words :"),
        html.Div(id="output"),
    ]
)

@callback(
    Output("output", "children"),
    Input("input1", "value"),
)
def update_output(input1):
    # Assuming 'new_antecedents' contains the antecedents for prediction
    if input1 is None:
        recommendation_list = ''
        
    else : 
        new_antecedents = input1.lower().split(' ') 

        # Predict consequents for the new antecedents
        predicted_consequents = saved_rules[saved_rules['antecedents'].apply(lambda x: set(x).issuperset(new_antecedents))]
        
        temp1 = predicted_consequents.sort_values('confidence', ascending=False).head(20)
        temp1['Recommendations'] = temp1[['antecedents_consequents']].apply(lambda x : [i for i in list(x['antecedents_consequents']) if i not in new_antecedents], axis = 1)

        try : 
            recommendation_list = set(list(numpy.concatenate(list(temp1['Recommendations'])).flat))
        except:
            recommendation_list = ''
    return f'{recommendation_list}'


if __name__ == "__main__":
    app.run(debug=False)