In [18]:
import pandas as pd
from sklearn.metrics.pairwise import cosine_similarity

# Step 1: Load and Clean Data
file_path=("/content/OnlineRetail (1).xlsx")

#converting into csv file
data=pd.read_csv(file_path)
#print(data)

# Clean invalid entries
data.dropna(subset=['CustomerID', 'Description'], inplace=True)
data = data[~data['InvoiceNo'].astype(str).str.startswith('C')]
data = data[data['Quantity'] > 0]
data = data[data['UnitPrice'] > 0]

original_descriptions = data['Description'].unique() #to store original descriptions before cleaning(for samples testing)
data = data[~data['Description'].str.upper().isin(['DISCOUNT', 'MANUAL', '', 'SAMPLES', 'POSTAGE'])]

data.loc[:, 'Description'] = data['Description'].str.strip().str.upper()

# Step 2: Build Customer-Item Interaction Matrix
interaction_matrix = data.pivot_table(
    index='CustomerID',
    columns='Description',
    values='Quantity',
    aggfunc='sum',
    fill_value=0
)

# Step 3: Transpose to get Item-Customer Matrix
product_matrix = interaction_matrix.T

# Step 4: Compute Cosine Similarity Between Products
similarity_scores = cosine_similarity(product_matrix)
similarity_df = pd.DataFrame(similarity_scores, index=product_matrix.index, columns=product_matrix.index)

# Step 5: Define Recommendation Function
def recommend_similar_items(item_name, top_n=5):
    item_name = item_name.strip().upper()
    if item_name not in similarity_df.index:
        return f"'{item_name}' not found in product list ."
    recommendations = similarity_df[item_name].sort_values(ascending=False)
    return recommendations[1:top_n+1]

# Step 6: Execution
print("\nWelcome to the Product Recommendation System!\n")

# Provide some sample product names
sample_products = [
    product
    for product in original_descriptions[:10]  # Get the first 10 unique products
    if product.strip().upper() in data['Description'].unique()  # Check if it is in cleaned data
]
print("Here are some sample products you can try:")
for product in sample_products:
    print(f"- {product}")

while True:
    item_name = input("Enter the product name for recommendation from above (or type 'stop' to quit): ")
    if item_name.strip().lower() == 'stop':
        break  # Exit the loop if the user enters 'stop'

    print("\n" + "*" * 50)
    print(f"Recommendations for: {item_name.upper()}")
    print("*" * 50 + "\n")

    recommendations = recommend_similar_items(item_name)
    if isinstance(recommendations, str):  # Check if recommendations is an error message
        print(recommendations)
    else:
        for i, (product, score) in enumerate(recommendations.items(), 1):
            print(f"{i}. {product} (Similarity Score: {score:.4f})")

    print("-" * 50 + "\n")

print("\nThank you for using the recommendation system!")



Welcome to the Product Recommendation System!

Here are some sample products you can try:
- WHITE HANGING HEART T-LIGHT HOLDER
- WHITE METAL LANTERN
- CREAM CUPID HEARTS COAT HANGER
- KNITTED UNION FLAG HOT WATER BOTTLE
- RED WOOLLY HOTTIE WHITE HEART.
- SET 7 BABUSHKA NESTING BOXES
- GLASS STAR FROSTED T-LIGHT HOLDER
- HAND WARMER UNION JACK
- HAND WARMER RED POLKA DOT
- ASSORTED COLOUR BIRD ORNAMENT
Enter the product name for recommendation from above (or type 'stop' to quit): RED WOOLLY HOTTIE WHITE HEART.

**************************************************
Recommendations for: RED WOOLLY HOTTIE WHITE HEART.
**************************************************

1. PINK MARSHMALLOW SCARF KNITTING KIT (Similarity Score: 0.9725)
2. CREAM SLICE FLANNEL PINK SPOT (Similarity Score: 0.9650)
3. 6 CHOCOLATE LOVE HEART T-LIGHTS (Similarity Score: 0.9567)
4. LADS ONLY TISSUE BOX (Similarity Score: 0.9516)
5. ROSE SCENT CANDLE IN JEWELLED BOX (Similarity Score: 0.9231)
-------------------------