In [48]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings('ignore')

In [49]:
def merge_files(filenames, output_filename):
    with open(output_filename, "w", encoding="utf-8") as outfile:
        for filename in filenames:
            with open(filename, "r", encoding="utf-8") as infile:
                outfile.write(infile.read())

filenames = ["f.csv","f1.csv","f2.csv","f3.csv","f4.csv","f5.csv","f6.csv","f7.csv","f8.csv","f9.csv",
             "f10.csv","f11.csv","f12.csv","f3.csv","f14.csv"]
output_filename = "merged_file.csv"
merge_files(filenames, output_filename)


In [50]:
df=pd.read_csv('merged_file.csv')
df.head(5)

Unnamed: 0,Product_Name,Selling Price,MRP,Items Bought Last Month,Ratings,Numeric_Ratings,Total Ratings,Brand,Offer%
0,1.5 Ton 3 Star AI Flexicool Inverter Split AC ...,33990.0,67790.0,600,"4.0 out of 5 stars 2,085",4.0,2085,LG,49.86
1,"1.5 Ton 3 Star Inverter Split AC (Copper, PM 2...",36990.0,58400.0,600,"4.0 out of 5 stars 1,454",4.0,1454,LG,36.66
2,1.5 Ton 3 Star Inverter Split AC (5 in 1 Conve...,32990.0,58990.0,500,"4.2 out of 5 stars 5,561",4.2,5561,Samsung,44.08
3,1.5 Ton 5 Star Wi-Fi Inverter Smart Split AC (...,42990.0,63400.0,500,"4.2 out of 5 stars 4,881",4.2,4881,Haier,32.19
4,1.5 Ton 5 Star AI Flexicool Inverter Split AC ...,40990.0,76090.0,300,"4.0 out of 5 stars 1,531",4.0,1531,Voltas,46.13


In [51]:
df.shape

(81915, 9)

In [52]:
df.isnull().sum()

Product_Name               0
Selling Price              0
MRP                        0
Items Bought Last Month    0
Ratings                    0
Numeric_Ratings            0
Total Ratings              0
Brand                      8
Offer%                     0
dtype: int64

In [53]:
df=df.dropna()

In [54]:
df.isnull().sum()

Product_Name               0
Selling Price              0
MRP                        0
Items Bought Last Month    0
Ratings                    0
Numeric_Ratings            0
Total Ratings              0
Brand                      0
Offer%                     0
dtype: int64

In [55]:
df.shape

(81907, 9)

# Label Encoding

In [56]:
# Encode brand names
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
df['Brand_Encoded'] = encoder.fit_transform(df['Brand'])

In [57]:
df.dtypes

Product_Name               object
Selling Price              object
MRP                        object
Items Bought Last Month    object
Ratings                    object
Numeric_Ratings            object
Total Ratings              object
Brand                      object
Offer%                     object
Brand_Encoded               int32
dtype: object

# Changing DataTypes

In [58]:
# Convert columns to the required data types
df['Selling Price'] = pd.to_numeric(df['Selling Price'], errors='coerce')
df['MRP'] = pd.to_numeric(df['MRP'], errors='coerce')
df['Items Bought Last Month'] = pd.to_numeric(df['Items Bought Last Month'], errors='coerce')
df['Total Ratings'] = pd.to_numeric(df['Total Ratings'], errors='coerce')
df['Offer%'] = pd.to_numeric(df['Offer%'], errors='coerce')
df['Numeric_Ratings'] = pd.to_numeric(df['Numeric_Ratings'], errors='coerce')
df['Brand'] = df['Brand'].astype(str)

In [59]:
df.dtypes

Product_Name                object
Selling Price              float64
MRP                        float64
Items Bought Last Month    float64
Ratings                     object
Numeric_Ratings            float64
Total Ratings              float64
Brand                       object
Offer%                     float64
Brand_Encoded                int32
dtype: object

In [60]:
df.isnull().sum()

Product_Name                0
Selling Price              14
MRP                        14
Items Bought Last Month    14
Ratings                     0
Numeric_Ratings            14
Total Ratings              14
Brand                       0
Offer%                     14
Brand_Encoded               0
dtype: int64

In [61]:
df=df.dropna()

In [62]:
df.isnull().sum()

Product_Name               0
Selling Price              0
MRP                        0
Items Bought Last Month    0
Ratings                    0
Numeric_Ratings            0
Total Ratings              0
Brand                      0
Offer%                     0
Brand_Encoded              0
dtype: int64

# Top 40 Brands

In [63]:
df['Brand'].value_counts()[:40]

Brand
Skechers         4253
Adidas           4211
Under Armour     4193
Reebok           4145
New Balance      4142
Nike             4139
Puma             4077
Vans             4074
HP               2353
Samsung          2200
Lenovo           1978
Acer             1650
Huawei           1407
Sony             1403
LG               1150
iPhone           1116
MSI              1100
Dell              994
Oppo              964
Panasonic         961
Motorola          940
Vivo              920
ASUS              920
Xiaomi            894
OnePlus           860
Realme            816
DELL              760
Philips           697
Skullcandy        695
RHA               689
Apple AirPods     678
Shure             677
Beats by Dre      664
Sennheiser        658
Bose              657
Jabra             639
Toshiba           592
ViewSonic         548
BenQ              547
AOC               534
Name: count, dtype: int64

In [64]:
df['Brand']=df['Brand'].replace('Dell','DELL')

# NLP

In [65]:
from sklearn.feature_extraction.text import TfidfVectorizer
import re
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

In [66]:
# Ensure NLTK resources are downloaded
import nltk
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\bittu\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\bittu\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\bittu\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

## NLP PreProcessing

In [67]:
# Define preprocess_text function
def preprocess_text(text):
    text = text.lower()  # Convert text to lowercase
    text = re.sub(r'[^\w\s]', '', text)  # Remove special characters and punctuation
    tokens = word_tokenize(text)  # Tokenize the text
    stop_words = set(stopwords.words('english'))   # Remove stopwords
    tokens = [word for word in tokens if word not in stop_words]
    lemmatizer = WordNetLemmatizer()  # Lemmatize the tokens
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    processed_text = ' '.join(tokens)  # Join tokens back into a string
    return processed_text


In [68]:
# Preprocess product names
df['Processed_Product_Name'] = df['Product_Name'].apply(preprocess_text)

In [69]:
# Vectorize the preprocessed product names
tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['Processed_Product_Name'])

In [70]:
df

Unnamed: 0,Product_Name,Selling Price,MRP,Items Bought Last Month,Ratings,Numeric_Ratings,Total Ratings,Brand,Offer%,Brand_Encoded,Processed_Product_Name
0,1.5 Ton 3 Star AI Flexicool Inverter Split AC ...,33990.00,67790.0,600.0,"4.0 out of 5 stars 2,085",4.0,2085.0,LG,49.86,1730,15 ton 3 star ai flexicool inverter split ac c...
1,"1.5 Ton 3 Star Inverter Split AC (Copper, PM 2...",36990.00,58400.0,600.0,"4.0 out of 5 stars 1,454",4.0,1454.0,LG,36.66,1730,15 ton 3 star inverter split ac copper pm 25 f...
2,1.5 Ton 3 Star Inverter Split AC (5 in 1 Conve...,32990.00,58990.0,500.0,"4.2 out of 5 stars 5,561",4.2,5561.0,Samsung,44.08,2702,15 ton 3 star inverter split ac 5 1 convertibl...
3,1.5 Ton 5 Star Wi-Fi Inverter Smart Split AC (...,42990.00,63400.0,500.0,"4.2 out of 5 stars 4,881",4.2,4881.0,Haier,32.19,1344,15 ton 5 star wifi inverter smart split ac cop...
4,1.5 Ton 5 Star AI Flexicool Inverter Split AC ...,40990.00,76090.0,300.0,"4.0 out of 5 stars 1,531",4.0,1531.0,Voltas,46.13,3189,15 ton 5 star ai flexicool inverter split ac c...
...,...,...,...,...,...,...,...,...,...,...,...
81910,Women's Deerskin Gore Front Moccasin Black Dee...,17564.26,0.0,100.0,4.1 out of 5 stars 9,4.1,9.0,Adidas,0.00,254,woman deerskin gore front moccasin black deers...
81911,Women s Deerskin Gore Front Moccasin Black Dee...,11234.50,0.0,100.0,4.3 out of 5 stars 20,4.3,20.0,Reebok,0.00,2494,woman deerskin gore front moccasin black deers...
81912,Women's Nurse Mates ANNIE LO Moccasins WHITE 8...,16290.00,0.0,100.0,5.0 out of 5 stars 6,5.0,6.0,New Balance,0.00,2069,woman nurse mate annie lo moccasin white 85
81913,Women's Chillax Washed Laceless Slip-On Sneake...,399.00,999.0,100.0,3.9 out of 5 stars 8,3.9,8.0,New Balance,60.06,2069,woman chillax washed laceless slipon sneakerdr...


# Recommendation System

In [71]:
# Function to recommend products based on brand and ratings
def recommend_products_(brand_name, min_rating=2.0, top_n=10000):
    filtered_df = df[(df['Brand'] == brand_name) & (df['Numeric_Ratings'] >= min_rating)]
    if filtered_df.empty:
        return "No products found for the given brand and rating criteria."
    else:
        recommended_products = filtered_df.sort_values(by='Numeric_Ratings', ascending=False).head(top_n)[['Product_Name', 'Numeric_Ratings']]
        return recommended_products

In [72]:
# User interaction
print("Welcome to the Product Recommender!")
print("Please enter the brand name:")
brand_name = input("Brand: ")
print("Please enter the minimum rating (e.g., 4.0):")
min_rating = float(input("Minimum Rating: "))


Welcome to the Product Recommender!
Please enter the brand name:


Brand:  Puma


Please enter the minimum rating (e.g., 4.0):


Minimum Rating:  4.0


# Recommended Products

In [73]:
# Recommend products
recommended_products = recommend_products_(brand_name, min_rating)
if isinstance(recommended_products, str):
    print(recommended_products)
else:
    print("\nRecommended Products:")
    print(recommended_products.to_string(index=False))

# Display all recommended products
print("\nAll Recommended Products:")
print(recommended_products)


Recommended Products:
                                                                                                                                                                                          Product_Name  Numeric_Ratings
                                                                                                                                                                             Mens Leather Sandals 2022              5.0
                                                                                                                                                                 Stylish Walking Sneakers Casual Shoes              5.0
                                                                                    Imported Foam Running/Jogging Clogs, Non-Slip Thick Sole Walking Sneakers, Quick-Drying Summer Beach Sandals/Clogs              5.0
                                                                                                                 

### Final Dataset

In [74]:
df.head()

Unnamed: 0,Product_Name,Selling Price,MRP,Items Bought Last Month,Ratings,Numeric_Ratings,Total Ratings,Brand,Offer%,Brand_Encoded,Processed_Product_Name
0,1.5 Ton 3 Star AI Flexicool Inverter Split AC ...,33990.0,67790.0,600.0,"4.0 out of 5 stars 2,085",4.0,2085.0,LG,49.86,1730,15 ton 3 star ai flexicool inverter split ac c...
1,"1.5 Ton 3 Star Inverter Split AC (Copper, PM 2...",36990.0,58400.0,600.0,"4.0 out of 5 stars 1,454",4.0,1454.0,LG,36.66,1730,15 ton 3 star inverter split ac copper pm 25 f...
2,1.5 Ton 3 Star Inverter Split AC (5 in 1 Conve...,32990.0,58990.0,500.0,"4.2 out of 5 stars 5,561",4.2,5561.0,Samsung,44.08,2702,15 ton 3 star inverter split ac 5 1 convertibl...
3,1.5 Ton 5 Star Wi-Fi Inverter Smart Split AC (...,42990.0,63400.0,500.0,"4.2 out of 5 stars 4,881",4.2,4881.0,Haier,32.19,1344,15 ton 5 star wifi inverter smart split ac cop...
4,1.5 Ton 5 Star AI Flexicool Inverter Split AC ...,40990.0,76090.0,300.0,"4.0 out of 5 stars 1,531",4.0,1531.0,Voltas,46.13,3189,15 ton 5 star ai flexicool inverter split ac c...


### Saved File

In [75]:
df.to_csv('merged_file.csv',index=False)

# ChatBot

In [76]:
import pandas as pd
products=pd.read_csv('merged_file.csv')
products.head()

Unnamed: 0,Product_Name,Selling Price,MRP,Items Bought Last Month,Ratings,Numeric_Ratings,Total Ratings,Brand,Offer%,Brand_Encoded,Processed_Product_Name
0,1.5 Ton 3 Star AI Flexicool Inverter Split AC ...,33990.0,67790.0,600.0,"4.0 out of 5 stars 2,085",4.0,2085.0,LG,49.86,1730,15 ton 3 star ai flexicool inverter split ac c...
1,"1.5 Ton 3 Star Inverter Split AC (Copper, PM 2...",36990.0,58400.0,600.0,"4.0 out of 5 stars 1,454",4.0,1454.0,LG,36.66,1730,15 ton 3 star inverter split ac copper pm 25 f...
2,1.5 Ton 3 Star Inverter Split AC (5 in 1 Conve...,32990.0,58990.0,500.0,"4.2 out of 5 stars 5,561",4.2,5561.0,Samsung,44.08,2702,15 ton 3 star inverter split ac 5 1 convertibl...
3,1.5 Ton 5 Star Wi-Fi Inverter Smart Split AC (...,42990.0,63400.0,500.0,"4.2 out of 5 stars 4,881",4.2,4881.0,Haier,32.19,1344,15 ton 5 star wifi inverter smart split ac cop...
4,1.5 Ton 5 Star AI Flexicool Inverter Split AC ...,40990.0,76090.0,300.0,"4.0 out of 5 stars 1,531",4.0,1531.0,Voltas,46.13,3189,15 ton 5 star ai flexicool inverter split ac c...


In [77]:
import random
import re
import nltk
from nltk.chat.util import Chat, reflections

In [78]:
def welcome_greet():
    responses = ['Hello!', 'Hi there!', 'Hey!']
    return responses[random.randint(0, len(responses)-1)]

def exit_greet():
    responses = ['Goodbye!', 'See you later!', 'Bye!']
    return responses[random.randint(0, len(responses)-1)]

def thanks():
    responses = ['You\'re welcome!', 'No problem!', 'Anytime!']
    return responses[random.randint(0, len(responses)-1)]

def search_products(query):
    results = products[products['Product_Name'].str.contains(query, case=False)]
    return results

In [79]:
# Define patterns for matching intents
patterns = [
    (r'hi|hello|hey', ['welcome_greet']),
    (r'bye|goodbye', ['exit_greet']),
    (r'thanks|thank you', ['thanks']),
]

# Create a chatbot using NLTK
chatbot = Chat(patterns)

In [80]:
# Function to start the chat
def start_chat():
    print("Chatbot: Hello! Welcome to the chatbot")
    print("Chatbot: How can I assist you today?")
    while True:
        user_input = input("User: ")
        print("You: ", user_input)
        if user_input.lower() == "exit":
            print("Chatbot: Goodbye!")
            break
        # Search for products based on user input
        results = search_products(user_input)
        if not results.empty:
            print("Chatbot: Here are the products matching your query:")
            print(results[['Product_Name', 'Selling Price', 'MRP', 'Ratings']])
        else:
            print("Chatbot: Sorry, I couldn't find any products matching your query.")

if __name__ == "__main__":
    start_chat()


Chatbot: Hello! Welcome to the chatbot
Chatbot: How can I assist you today?


User:  Puma


You:  Puma
Chatbot: Here are the products matching your query:
                                            Product_Name  Selling Price  \
31847  unisex-adult FLYER Runner Mesh Puma Black-Cast...        2211.00   
32683  unisex-adult NRGY Comet Puma Black-Puma White ...        1799.00   
36282                  unisex-adult Puma 1948 Mid L Flat        2849.00   
36653  unisex-adult NRGY Comet Puma Black-Puma White ...        1809.00   
37037                              mens Puma Wick Sandal        1124.00   
39258  Unisex-Adult Disperse XT 2 Puma Black-Burnt Re...        2750.00   
39884  Unisex-Adult Disperse XT 2 Puma Black-Burnt Re...        2750.00   
39908  unisex-adult Flex Essential Pro Peacoat-Puma W...        1519.00   
40765                   Puma Unisex-Adult Crafty Sneaker        3224.00   
41872  mens Pacer X Graphicster Dark Shadow-Surf The ...        2199.00   
42491                                               Puma         689.00   
42506  unisex-adult Wired Closed shoe

User:  Asus


You:  Asus
Chatbot: Here are the products matching your query:
                                            Product_Name  Selling Price  \
890    (Refurbished) ASUS Chromebox 3 High Performanc...       19999.00   
912    (Refurbished) ASUS High Performance Mini PC (I...       19999.00   
951    (Refurbished) ASUS Compact Tiny Mini PC PN41 (...       11474.00   
1042   (Refurbished) ASUS Chromebox 3 High Performanc...       17999.00   
1150   (Refurbished) ASUS High Performance Mini PC (I...       16999.00   
...                                                  ...            ...   
78573  Pegasus galaxy Women s Leather Jazz Shoes Slip...         399.00   
81184  Pegasus galaxy Women s Leather Jazz Shoes Slip...        6498.00   
81208  Pegasus galaxy Women s Leather Jazz Shoes Slip...        4317.00   
81209  Pegasus galaxy Women s Leather Jazz Shoes Slip...        6733.00   
81210  Pegasus galaxy Women s Leather Jazz Shoes Slip...        4740.48   

           MRP                   Rat

User:  exit


You:  exit
Chatbot: Goodbye!
