### Importing libraries

In [1]:
import pandas as pd
import numpy as np
import re
import nltk
from nltk.stem import WordNetLemmatizer
from nltk import pos_tag
from sklearn.metrics import pairwise_distances
import pickle

In [18]:
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [24]:
import nltk
nltk.download('averaged_perceptron_tagger')


[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [26]:
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to /root/nltk_data...


True

### Uploading and reading data

In [7]:
df = pd.read_excel('/content/webscrap_hyryder.xlsx')
df

Unnamed: 0,Name,Category,Content
0,Vid6639,Team-BHP Support,2022 Toyota Urban Cruiser Hyryder Review Toyot...
1,Vid6639,Team-BHP Support,Interior Cabin Design & Quality Step inside th...
2,Vid6639,Team-BHP Support,Driving the Urban Cruiser Hyryder 1.5L Hybrid ...
3,Vid6639,Team-BHP Support,Toyota Urban Cruiser Hyryder Exterior Images F...
4,Vid6639,Team-BHP Support,Toyota Urban Cruiser Hyryder Interior Images B...
...,...,...,...
1323,drsachin,BHPian,"Quote: Originally Posted by dhinchak Hi, Can a..."
1324,shikh_oberoi,BHPian,"After 14000kms of safe driving, an e rickshaw ..."
1325,sriny_blr,BHPian,Swift to Hy(brid)Ryder; Almost all of the comp...
1326,ex-innova-guy,BHPian,Quote: Originally Posted by sriny_blr  Power ...


### Text normalization

In [3]:
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()

In [27]:
# Text Normalization
def text_normalization(text):
    lema_sent = []
    try :
        pre_text = text.lower()
        words = re.sub(r'[^a-z0-9]'," ",pre_text)
        tag_list = pos_tag(nltk.word_tokenize(words),tagset=None)
        for token, pos_token in tag_list:
            if pos_token.startswith("V"):
                pos_val = "v"
            elif pos_token.startswith("J"):
                pos_val = "a"
            elif pos_token.startswith("R"):
                pos_val = "r"
            else:
                pos_val = "n"
            lema_token = lemmatizer.lemmatize(token,pos_val)
            lema_sent.append(lema_token)
        return " ".join(lema_sent)
    except:
        pass

In [28]:
lemmatized_df = text_normalization(df['Content'][1326])

In [29]:
print(lemmatized_df)

quote originally post by sriny blr power windows button though looks same a swfit the operation be very smooth in hyryder thank goodness the door doesn t vibrate when window roll up down unlike my swift interest i never felt this in my innova but can feel the mechanism in the hyryder whenever i operate the driver side window quote door atleast driver side doesn t auto unlock when i shut the engine not sure if there be an option to change this you can change it by go in set mode by follow step long press the right side knob of the speedometer turn the same knob to right or leave for navigate through the menu go into door lock setting and there will be an option to auto unlock when ignition be turn off even i find this feature very annoy since i have to always manually unlock all door but not anymore quote yet to understand or need more clarity there be button provide for traction control doesn t seem to make much of a difference not sure by what you mean here but when the car be station

In [30]:
df['lemmatized_text'] = df['Content'].apply(text_normalization)

In [31]:
df.head()

Unnamed: 0,Name,Category,Content,lemmatized_text
0,Vid6639,Team-BHP Support,2022 Toyota Urban Cruiser Hyryder Review Toyot...,2022 toyota urban cruiser hyryder review toyot...
1,Vid6639,Team-BHP Support,Interior Cabin Design & Quality Step inside th...,interior cabin design quality step inside the ...
2,Vid6639,Team-BHP Support,Driving the Urban Cruiser Hyryder 1.5L Hybrid ...,drive the urban cruiser hyryder 1 5l hybrid to...
3,Vid6639,Team-BHP Support,Toyota Urban Cruiser Hyryder Exterior Images F...,toyota urban cruiser hyryder exterior image fr...
4,Vid6639,Team-BHP Support,Toyota Urban Cruiser Hyryder Interior Images B...,toyota urban cruiser hyryder interior image bl...


In [22]:
def text_normalization(text):
    lema_sent = []
    try:
        pre_text = text.lower()
        words = re.sub(r'[^a-z0-9]', " ", pre_text)
        tag_list = pos_tag(nltk.word_tokenize(words), tagset=None)
        for token, pos_token in tag_list:
            if pos_token.startswith("V"):
                pos_val = "v"
            elif pos_token.startswith("J"):
                pos_val = "a"
            elif pos_token.startswith("R"):
                pos_val = "r"
            else:
                pos_val = "n"
            lema_token = lemmatizer.lemmatize(token, pos_val)
            lema_sent.append(lema_token)
        return " ".join(lema_sent)
    except Exception as e:
        print(f"Error in text normalization: {e}")
        return None

### Text vectorisation

In [32]:
# After finishing the normalization of text, we should proceed to tfidf vectorization
from sklearn.feature_extraction.text import TfidfVectorizer
tfidf = TfidfVectorizer()

In [33]:
x_tfidf = tfidf.fit_transform(df['lemmatized_text']).toarray()

In [34]:
x_tfidf

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [37]:
def validation(x_tfidf, query_ask):
    cos = 1-pairwise_distances(x_tfidf, query_ask,metric='cosine')
    ind = cos.argmax()
    threshold = cos[ind]
    if threshold > 0.2:
        result = df['Content'].loc[ind]
    else:
        result = df['Content'].loc[51]
    return result

### Response to questions

In [38]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: How is interior cabin design?
bot replays: Quote: Originally Posted by Vid6639  Quality of interior plastics feels more Maruti than Toyota. Frankly, the S-Cross cabin feels better in quality  Interior is narrow and doesn't feel as roomy as some competitors.  NVH levels from the drivetrain are surprisingly poor for a hybrid. Sounds from the 3-cylinder engine, electric motor and brake vacuum pump filter into the cabin. The Urban Cruiser Hyryder is more a Maruti than a Toyota. . Oustanding and detailed review as always! Please help me in comparing the above quoted part with New Brezza! w.r.t. space, NVH and Fit and Finish. I am confused between G AT Mild Hybrid and Brezza ZXI AT. If the price difference is ~ 1L


In [39]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: Price of toyota hyryder
bot replays: Quote: Originally Posted by Ralags Are you getting it for new price as of 2023 or with old price There is no price hike announced for Hyryder yet.


In [40]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: Queries about battery
bot replays: Any idea about the battery warranty for the hybrid version? If there is any problem, will they replace the battery at zero cost, or would it be on a pro rata basis like usual car batteries?


In [42]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)


Enter any message: Grand vitara
bot replays: Toyota HyRyder Vs. Maruti Suzuki's Grand Vitara pricing compared: Link


In [43]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: grand vitara
bot replays: Toyota HyRyder Vs. Maruti Suzuki's Grand Vitara pricing compared: Link


In [45]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: All reviews on hyryder
bot replays: Quote: Originally Posted by SumitB Can someone please confirm how much mileage can I expect from the Hyryder if I follow the above regime with the vehicle? You can check these 3 posts posted by fellow BHPians for it. 1 (Toyota Urban Cruiser Hyryder Review) 2 (Toyota Urban Cruiser Hyryder Review) 3 (Toyota Urban Cruiser Hyryder Review)


In [46]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: sunroof
bot replays: Can anybody comment if front and rear headroom is any better for non sunroof variants compared to sunroof variants?


In [47]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: Rear
bot replays: Quote: Originally Posted by SDx For me this is a negative, I am in the market for a compact car for the busy roads of Delhi. Rear seat is indeed small, the couple who sat in the rear seat before me were a bit on the heavier side and I was doubtful that my 8 yr old niece would fit in between them. Do you happen to know a more compact automatic vehicle with better rear seat experience?


In [49]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: horn issue
bot replays: Has anyone tried changing their horn? Do we need to remove the whole front bumper to change the horn?


In [51]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: mileage
bot replays: For mileage refill checks, there's no need to reset it manually. You can check the mileage history, and it will display the mileage for the last five refuels.


In [55]:
user_input = input("Enter any message: ")
query_ask = text_normalization(user_input)
query_ask = tfidf.transform([query_ask]).toarray()
response = validation(x_tfidf,query_ask)
print("bot replays:",response)

Enter any message: AC issues
bot replays: Quote: Originally Posted by drsachin I have done 300kms till now , so here is my initial review.. Cons 1. AC: The summers have just started and ac feels barely adequate. Contrary to popular belief i think it's not because of the mosquito net shade but it's actually due to the Pano sunroof. It almost becomes a glass house due to big sunroof with heat entrapped beneath it. Surprised about the AC being a con. Toyota cars generally come with a chiller of an AC.
