In [2]:
# Content-Based-Filtering

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from sklearn.metrics import accuracy_score
import pickle

# Gather data
events = pd.read_csv('caterings1.csv')
#url = (r"")


# Define features
features = [ 'Caterer Name','Address','Price Range','Email Address']

# Preprocess data
events = events.drop_duplicates(subset=['Email Address']) # Remove duplicates
events = events[features].fillna('') # Fill any missing values with empty string

# Vectorize features
vectorizer = TfidfVectorizer()
vectors = vectorizer.fit_transform(events.apply(lambda x: ' '.join(x.astype(str)), axis=1))
feature_names = vectorizer.get_feature_names_out()


# Build the model / Training the model
def content_based_filtering(user_events):
    # Get user's event history
    user_vector = vectorizer.transform([' '.join(user_events)])
    
    # Compute similarity scores between user's history and all events
    similarity_scores = cosine_similarity(user_vector, vectors).flatten()
    
    # Get top n most similar events
    top_indices = similarity_scores.argsort()[::-1]#[:n_recommendations]
    
    recommend=events.iloc[top_indices]
    
    # Return event recommendations
    return recommend

# Train and test the model
user_events = ["New Delhi","300"]
recommendations = content_based_filtering(user_events)
print(recommendations)   

               Caterer Name                         Address   
76          Indian Caterers                   Dwarka, Delhi  \
79           Grand Caterers           Rajouri Garden, Delhi   
68   Kitchen Kraft Catering                    Saket, Delhi   
143          Royal Caterers                          Ambala   
71         Flavors Catering                   Rohini, Delhi   
..                      ...                             ...   
128           Silver Spoons                         Gurgaon   
129   Chef's Table Caterers                         Gurgaon   
130          Zayka Caterers                          Ambala   
131        The Green Bakers                          Ambala   
0            Laxmi Caterers  Hiran Magri, Sector 4, Udaipur   

                      Price Range                   Email Address  
76    Rs. 300 - Rs. 800 per plate        indiancaterers@gmail.com  
79    Rs. 100 - Rs. 300 per plate    grandcaterersindia@gmail.com  
68    Rs. 300 - Rs. 800 per plate   inf

In [4]:
events.head()

Unnamed: 0,Caterer Name,Address,Contact Number,Email Address,Specialties,Price Range
0,Laxmi Caterers,"Hiran Magri, Sector 4, Udaipur",0294-2460202,info@laxmicaterers.in,"Vegetarian, North Indian, Chinese",Rs. 500 - Rs. 1000 per plate
1,Panna Lal Catering,"Raja Park, Jaipur",098291 20009,pannalalcaterers@gmail.com,"Rajasthani, North Indian, Continental",Rs. 300 - Rs. 800 per plate
2,Jodhpur Caterers,"9th C Road, Sardarpura, Jodhpur",0291-2627181,info@jodhpurcaterers.com,"Rajasthani, North Indian, Chinese",Rs. 600 - Rs. 1500 per plate
3,Shri Marwar Caterers,"2nd Floor, Om Shanti Complex, Bhupalpura, Udaipur",0294-2423634,info@shrimarwarcaterers.com,"Rajasthani, North Indian, Continental",Rs. 200 - Rs. 500 per plate
4,Sharma Caterers,"Ambabari, Jaipur",098290 57993,sharmacaterers.jaipur@gmail.com,"Rajasthani, North Indian, Chinese",Rs. 100 - Rs. 300 per plate


In [12]:
#Naive bayes for checking the accuracy

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# define the feature and target columns
X = events["Specialties"]
y = events["Price Range"]

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# define a pipeline to vectorize text and train a Multinomial Naive Bayes classifier
pipeline = Pipeline([
    ("vectorizer", TfidfVectorizer()),
    ("classifier", MultinomialNB())
])

# fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# make predictions on the test set
y_pred = pipeline.predict(X_test)

# calculate accuracy of the model on the test set
a = accuracy_score(y_test, y_pred)

# print the accuracy score
print("Accuracy on test set: {:.2f}".format(a*100))



Accuracy on test set: 12.90


In [18]:
#Random Forest for accuracy

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# define the feature and target columns
X = events["Email Address"]
y = events["Price Range"]

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# define a pipeline to vectorize text and train a Random Forest Classifier
pipeline = Pipeline([
    ("vectorizer", TfidfVectorizer()),
    ("classifier", RandomForestClassifier())
])

# fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# make predictions on the test set
y_pred = pipeline.predict(X_test)

# calculate accuracy of the model on the test set
accuracy = accuracy_score(y_test, y_pred)

# print the accuracy score
print("Accuracy on test set: {:.2f}".format(accuracy))

Accuracy on test set: 0.13


In [20]:
#SVM for accuracy

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


# define the feature and target columns
X = events["Email Address"]
y = events["Price Range"]

# split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# define a pipeline to vectorize text and train a SVM Classifier
pipeline = Pipeline([
    ("vectorizer", TfidfVectorizer()),
    ("classifier", SVC())
])

# fit the pipeline on the training data
pipeline.fit(X_train, y_train)

# make predictions on the test set
y_pred = pipeline.predict(X_test)

# calculate accuracy of the model on the test set
accuracy = accuracy_score(y_test, y_pred)

# print the accuracy score
print("Accuracy on test set: {:.2f}".format(accuracy))


Accuracy on test set: 0.10


In [5]:
import gradio as gr

def content_based_filtering_interface(caterer_name, address, price_range, email_address, specialties, n_recommendations=3):
    user_events = [caterer_name, address, price_range, email_address, specialties]
    recommendations = content_based_filtering(user_events, n_recommendations)
    return recommendations.to_string(index=False)

iface = gr.Interface(
    content_based_filtering_interface,
    inputs=[
        gr.inputs.Textbox(label='Caterer Name'),
        gr.inputs.Textbox(label='Address'),
        gr.inputs.Textbox(label='Price Range'),
        gr.inputs.Textbox(label='Email Address'),
        gr.inputs.Textbox(label='Specialties'),
        gr.inputs.Number(default=3, label='Number of Recommendations')
    ],
    outputs=gr.outputs.Textbox(label='Recommendations')
)

iface.launch(share=True)




Running on local URL:  http://127.0.0.1:7863
Running on public URL: https://dbf77e4abee1c09f74.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces




Traceback (most recent call last):
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/gradio/routes.py", line 401, in run_predict
    output = await app.get_blocks().process_api(
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/gradio/blocks.py", line 1302, in process_api
    result = await self.call_function(
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/gradio/blocks.py", line 1025, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/anyio/to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 867, in run
    result = context.run(func, *args)
  Fil

In [6]:
def content_based_filtering_interface(address, n_recommendations=3):
    # Get the row index of the given address
    index = events[events['Address'] == address].index[0]
    
    # Get the features for the given row index
    caterer_name = events.iloc[index]['Caterer Name']
    price_range = events.iloc[index]['Price Range']
    email_address = events.iloc[index]['Email Address']
    specialties = events.iloc[index]['Specialties']
    
    # Get recommendations based on the given features
    user_events = [caterer_name, address, price_range, email_address, specialties]
    recommendations = content_based_filtering(user_events, n_recommendations)
    
    # Return recommendations without the address column
    return recommendations.drop('Address', axis=1)

iface = gr.Interface(
    content_based_filtering_interface,
    inputs=gr.inputs.Textbox(label='Address'),
    outputs=gr.outputs.Table(headers=['Caterer Name', 'Price Range', 'Email Address', 'Specialties'])
)

iface.launch(share=True)




AttributeError: module 'gradio.outputs' has no attribute 'Table'

In [3]:
def content_based_filtering_interface(address, n_recommendations=3):
    user_events = [address]
    recommendations = content_based_filtering(user_events, n_recommendations)
    return "\n".join(recommendations.to_string(index=False).split("\n")[1:])

iface = gr.Interface(
    content_based_filtering_interface,
    inputs=[
        gr.inputs.Textbox(label='Address'),
        gr.inputs.Number(label='Number of Recommendations')
    ],
    outputs=gr.outputs.Label(label='Recommendations')
)

iface.launch(share=True)

iface = gr.Interface(
    content_based_filtering_interface,
    inputs=Textbox(label='Address'),
    outputs=Label()
)



Running on local URL:  http://127.0.0.1:7861
Running on public URL: https://91f62f494cd3c6fd1e.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades (NEW!), check out Spaces: https://huggingface.co/spaces


NameError: name 'Textbox' is not defined

Traceback (most recent call last):
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/gradio/routes.py", line 401, in run_predict
    output = await app.get_blocks().process_api(
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/gradio/blocks.py", line 1302, in process_api
    result = await self.call_function(
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/gradio/blocks.py", line 1025, in call_function
    prediction = await anyio.to_thread.run_sync(
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/anyio/to_thread.py", line 31, in run_sync
    return await get_asynclib().run_sync_in_worker_thread(
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
    return await future
  File "/Users/vinitkumar/opt/anaconda3/lib/python3.9/site-packages/anyio/_backends/_asyncio.py", line 867, in run
    result = context.run(func, *args)
  Fil