In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel

# Load the dataset
df = pd.read_csv("dataset.csv")

# Define the columns to use for content-based recommendations
content_cols = ['Category', 'Size', 'Color', 'Season']

# Combine the selected columns into a single feature
df['content'] = df[content_cols].apply(lambda x: ' '.join(x), axis=1)

# Initialize the TfidfVectorizer
tfidf = TfidfVectorizer(stop_words='english')

# Fit and transform the content feature
tfidf_matrix = tfidf.fit_transform(df['content'])

# Compute the cosine similarity matrix
cosine_sim = linear_kernel(tfidf_matrix, tfidf_matrix)

# Import joblib
import joblib


# Save the model to a file
joblib.dump(cosine_sim, 'clickrecommendation.pkl')

# Load the model from the file
loaded_model = joblib.load('clickrecommendation.pkl')

# Function to get recommendations based on item name


In [4]:
def get_recommendations(item_name, cosine_sim=cosine_sim):
    # Get the index of the item that matches the name
    idx = df[df['Item Purchased'] == item_name].index[0]

    # Get the pairwise similarity scores of all items with that item
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the items based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the top 5 most similar items
    sim_scores = sim_scores[1:6]

    # Get the item indices
    item_indices = [i[0] for i in sim_scores]

    # Return the top 5 most similar items
    return df.iloc[item_indices]['Item Purchased'].unique().tolist()

# Example usage:
item_name = 'Blouse'
recommendations = get_recommendations(item_name)
print("Recommendations for", item_name, ":")
print(recommendations)

Recommendations for Blouse :
['Sweater', 'Jeans', 'Dress', 'Shorts']
