# Loading json file and convert it into CSV

In [2]:
import pandas as pd
import json
# Load the JSON file
with open('amazon.json', 'r') as file:
    data = json.load(file)

# Convert JSON to dataframe
df = pd.json_normalize(data)

# Save dataframe to CSV
df.to_csv('output.csv', index=False)


In [3]:
df

Unnamed: 0,name,review
0,Customer reviews: HP 2022 Newest Premium Lapto...,Find helpful customer reviews and review ratin...
1,Customer reviews: HP 14&#34,Find helpful customer reviews and review ratin...
2,Customer reviews: HP 15.6 Inch Touchscreen Lap...,Find helpful customer reviews and review ratin...
3,"Customer reviews: HP 15.6 Inch Laptop, Intel I...",Find helpful customer reviews and review ratin...
4,Customer reviews: HP 15 Laptop 15.6&#34,Find helpful customer reviews and review ratin...
...,...,...
230,"Customer reviews: HP 14 Laptop, Qualcomm Snapd...",Find helpful customer reviews and review ratin...
231,Customer reviews: 2021 Newest HP Laptop ...,Find helpful customer reviews and review ratin...
232,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....
233,Customer reviews: 2019 HP Pavilion 15.6-inch F...,Find helpful customer reviews and review ratin...


# Croping 'Customer reviews:' from all titles

In [4]:
df['name'] = [x.lstrip('Customer reviews:').rstrip('...') for x in df['name']]


In [5]:
df

Unnamed: 0,name,review
0,HP 2022 Newest Premium Laptop,Find helpful customer reviews and review ratin...
1,HP 14&#34,Find helpful customer reviews and review ratin...
2,HP 15.6 Inch Touchscreen Laptop,Find helpful customer reviews and review ratin...
3,"HP 15.6 Inch Laptop, Intel Iris",Find helpful customer reviews and review ratin...
4,HP 15 Laptop 15.6&#34,Find helpful customer reviews and review ratin...
...,...,...
230,"HP 14 Laptop, Qualcomm Snapdragon",Find helpful customer reviews and review ratin...
231,2021 Newest HP Laptop,Find helpful customer reviews and review ratin...
232,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....
233,2019 HP Pavilion 15.6-inch Full HD,Find helpful customer reviews and review ratin...


# Croping 'Find helpful customer reviews and review ratings for ' from the reviews

In [6]:
df['review'] = [x.lstrip('Find helpful customer reviews and review ratings for ') for x in df['review']]


In [7]:
df

Unnamed: 0,name,review
0,HP 2022 Newest Premium Laptop,HP 2022 Newest Premium ... Read honest and unb...
1,HP 14&#34,"HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-..."
2,HP 15.6 Inch Touchscreen Laptop,HP 15.6 Inch ... and a camera that was equal o...
3,"HP 15.6 Inch Laptop, Intel Iris","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ...."
4,HP 15 Laptop 15.6&#34,"HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad..."
...,...,...
230,"HP 14 Laptop, Qualcomm Snapdragon","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ..."
231,2021 Newest HP Laptop,"2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis..."
232,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....
233,2019 HP Pavilion 15.6-inch Full HD,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...


# Deleting Null values and duplicates

In [8]:
df = df[df["name"] != ""]
df.dropna(inplace=True)

# Drop duplicate rows
df.drop_duplicates(inplace=True)

# Reset the index after removing rows
df.reset_index(drop=True, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return func(*args, **kwargs)


In [9]:
df

Unnamed: 0,name,review
0,HP 2022 Newest Premium Laptop,HP 2022 Newest Premium ... Read honest and unb...
1,HP 14&#34,"HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-..."
2,HP 15.6 Inch Touchscreen Laptop,HP 15.6 Inch ... and a camera that was equal o...
3,"HP 15.6 Inch Laptop, Intel Iris","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ...."
4,HP 15 Laptop 15.6&#34,"HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad..."
...,...,...
168,"HP 14 Laptop, Qualcomm Snapdragon","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ..."
169,2021 Newest HP Laptop,"2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis..."
170,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....
171,2019 HP Pavilion 15.6-inch Full HD,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...


# code to remove '@' symbols

In [10]:

df['clean_review']=df['review'].apply(lambda i : ' '.join([review for review in i.split()if not review.startswith("@")]))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['clean_review']=df['review'].apply(lambda i : ' '.join([review for review in i.split()if not review.startswith("@")]))


In [11]:
df

Unnamed: 0,name,review,clean_review
0,HP 2022 Newest Premium Laptop,HP 2022 Newest Premium ... Read honest and unb...,HP 2022 Newest Premium ... Read honest and unb...
1,HP 14&#34,"HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-...","HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-..."
2,HP 15.6 Inch Touchscreen Laptop,HP 15.6 Inch ... and a camera that was equal o...,HP 15.6 Inch ... and a camera that was equal o...
3,"HP 15.6 Inch Laptop, Intel Iris","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ....","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ...."
4,HP 15 Laptop 15.6&#34,"HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad...","HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad..."
...,...,...,...
168,"HP 14 Laptop, Qualcomm Snapdragon","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ...","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ..."
169,2021 Newest HP Laptop,"2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis...","2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis..."
170,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....,Compare New & Used (39) from. $349.99$349.99 ....
171,2019 HP Pavilion 15.6-inch Full HD,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...


In [12]:
df['review'][10]

'HP Pavilion 15 Laptop Computer, ... Needed to replace an old HP laptop that was running very slow.'

# Code to remove NUMBERS

In [13]:
#training data
df['clean_review'] =df['clean_review'].apply(lambda x : ' '.join([review for review in x.split() if not review == '\d*']))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['clean_review'] =df['clean_review'].apply(lambda x : ' '.join([review for review in x.split() if not review == '\d*']))


# Code to remove GREEK Symbols

In [14]:
#Training data
import unidecode
df['clean_review'] = df['clean_review'].apply(lambda x : ' '.join([unidecode.unidecode(word) for word in x.split()])) 


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['clean_review'] = df['clean_review'].apply(lambda x : ' '.join([unidecode.unidecode(word) for word in x.split()]))


In [15]:
df

Unnamed: 0,name,review,clean_review
0,HP 2022 Newest Premium Laptop,HP 2022 Newest Premium ... Read honest and unb...,HP 2022 Newest Premium ... Read honest and unb...
1,HP 14&#34,"HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-...","HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-..."
2,HP 15.6 Inch Touchscreen Laptop,HP 15.6 Inch ... and a camera that was equal o...,HP 15.6 Inch ... and a camera that was equal o...
3,"HP 15.6 Inch Laptop, Intel Iris","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ....","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ...."
4,HP 15 Laptop 15.6&#34,"HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad...","HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad..."
...,...,...,...
168,"HP 14 Laptop, Qualcomm Snapdragon","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ...","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ..."
169,2021 Newest HP Laptop,"2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis...","2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis..."
170,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....,Compare New & Used (39) from. $349.99$349.99 ....
171,2019 HP Pavilion 15.6-inch Full HD,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...


# removeing of hmm words

In [16]:
df['clean_review'] = df['clean_review'].apply(lambda x : ' '.join([word for word in x.split() if not word == 'h(m)+' ]))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['clean_review'] = df['clean_review'].apply(lambda x : ' '.join([word for word in x.split() if not word == 'h(m)+' ]))


In [17]:
df

Unnamed: 0,name,review,clean_review
0,HP 2022 Newest Premium Laptop,HP 2022 Newest Premium ... Read honest and unb...,HP 2022 Newest Premium ... Read honest and unb...
1,HP 14&#34,"HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-...","HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-..."
2,HP 15.6 Inch Touchscreen Laptop,HP 15.6 Inch ... and a camera that was equal o...,HP 15.6 Inch ... and a camera that was equal o...
3,"HP 15.6 Inch Laptop, Intel Iris","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ....","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ...."
4,HP 15 Laptop 15.6&#34,"HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad...","HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad..."
...,...,...,...
168,"HP 14 Laptop, Qualcomm Snapdragon","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ...","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ..."
169,2021 Newest HP Laptop,"2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis...","2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis..."
170,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....,Compare New & Used (39) from. $349.99$349.99 ....
171,2019 HP Pavilion 15.6-inch Full HD,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...


# Removing the Stopwords

In [18]:
import nltk
from nltk.corpus import stopwords
nltk.download('stopwords')


[nltk_data] Downloading package stopwords to C:\Users\V BHAVANI
[nltk_data]     PRASAD\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [19]:
#Training data
df['clean_review'] = df['clean_review'].apply(lambda x : ' '.join([word for word in x.split() if not word in set(stopwords.words('english'))]))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['clean_review'] = df['clean_review'].apply(lambda x : ' '.join([word for word in x.split() if not word in set(stopwords.words('english'))]))


In [20]:
df

Unnamed: 0,name,review,clean_review
0,HP 2022 Newest Premium Laptop,HP 2022 Newest Premium ... Read honest and unb...,HP 2022 Newest Premium ... Read honest unbiase...
1,HP 14&#34,"HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-...","HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-..."
2,HP 15.6 Inch Touchscreen Laptop,HP 15.6 Inch ... and a camera that was equal o...,HP 15.6 Inch ... camera equal better last HP l...
3,"HP 15.6 Inch Laptop, Intel Iris","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ....","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ...."
4,HP 15 Laptop 15.6&#34,"HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad...","HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad..."
...,...,...,...
168,"HP 14 Laptop, Qualcomm Snapdragon","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ...","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ..."
169,2021 Newest HP Laptop,"2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis...","2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis..."
170,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....,Compare New & Used (39) from. $349.99$349.99 ....
171,2019 HP Pavilion 15.6-inch Full HD,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...


In [21]:
df['review'][10]

'HP Pavilion 15 Laptop Computer, ... Needed to replace an old HP laptop that was running very slow.'

In [22]:
df['clean_review'][10]

'HP Pavilion 15 Laptop Computer, ... Needed replace old HP laptop running slow.'

# Classifing the tweets as 'appreciation','complaint' and 'suggestion'

In [23]:
import nltk
from nltk.tokenize import word_tokenize

# Load the sentiment lexicon
nltk.download('opinion_lexicon')
positive_words = set(nltk.corpus.opinion_lexicon.positive())
negative_words = set(nltk.corpus.opinion_lexicon.negative())


[nltk_data] Downloading package opinion_lexicon to C:\Users\V BHAVANI
[nltk_data]     PRASAD\AppData\Roaming\nltk_data...
[nltk_data]   Package opinion_lexicon is already up-to-date!


In [24]:
# Function to classify a review
def classify_review(review):
    tokens = word_tokenize(review.lower())
    # Remove stopwords
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words]
    
    # Count the number of positive and negative words
    num_positive = len(positive_words.intersection(tokens))
    num_negative = len(negative_words.intersection(tokens))
    
    # Classify the review based on the word counts
    if num_positive > num_negative:
        return "appreciation"
    elif num_negative > num_positive:
        return "complaint"
    else:
        return "suggestion"

In [25]:

df['sentiments']=df['clean_review'].apply(lambda x: classify_review(x))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sentiments']=df['clean_review'].apply(lambda x: classify_review(x))


In [26]:
df

Unnamed: 0,name,review,clean_review,sentiments
0,HP 2022 Newest Premium Laptop,HP 2022 Newest Premium ... Read honest and unb...,HP 2022 Newest Premium ... Read honest unbiase...,appreciation
1,HP 14&#34,"HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-...","HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-...",appreciation
2,HP 15.6 Inch Touchscreen Laptop,HP 15.6 Inch ... and a camera that was equal o...,HP 15.6 Inch ... camera equal better last HP l...,appreciation
3,"HP 15.6 Inch Laptop, Intel Iris","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ....","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ....",suggestion
4,HP 15 Laptop 15.6&#34,"HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad...","HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad...",suggestion
...,...,...,...,...
168,"HP 14 Laptop, Qualcomm Snapdragon","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ...","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ...",complaint
169,2021 Newest HP Laptop,"2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis...","2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis...",suggestion
170,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....,Compare New & Used (39) from. $349.99$349.99 ....,suggestion
171,2019 HP Pavilion 15.6-inch Full HD,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...,complaint


In [27]:
df['review'][52]

'HP Pavilion dv6113us 15.4" ... laptop to save money and got an excellent deal in this HP laptop.'

In [28]:
df['sentiments'][52]

'appreciation'

In [34]:
data=df.drop(['review'], axis=1)


In [35]:
data

Unnamed: 0,name,clean_review,sentiments
0,HP 2022 Newest Premium Laptop,HP 2022 Newest Premium ... Read honest unbiase...,appreciation
1,HP 14&#34,"HP 14"" FHD IPS LED 1080p Laptop Intel Core i5-...",appreciation
2,HP 15.6 Inch Touchscreen Laptop,HP 15.6 Inch ... camera equal better last HP l...,appreciation
3,"HP 15.6 Inch Laptop, Intel Iris","HP 15.6 Inch Laptop, Intel Iris Xe Graphics, ....",suggestion
4,HP 15 Laptop 15.6&#34,"HP 15 Laptop 15.6"", AMD Ryzen 5 2500U, AMD Rad...",suggestion
...,...,...,...
168,"HP 14 Laptop, Qualcomm Snapdragon","HP 14 Laptop, Qualcomm Snapdragon 7c ... Made ...",complaint
169,2021 Newest HP Laptop,"2021 Newest HP Laptop, 17 HD+ (1600 x 900) Dis...",suggestion
170,"HP 2020 15 15.6"" HD Touchscreen Premium Laptop...",Compare New & Used (39) from. $349.99$349.99 ....,suggestion
171,2019 HP Pavilion 15.6-inch Full HD,2019 HP Pavilion 15.6-inch Full HD (1920 x 108...,complaint


In [None]:
#Saving to EXCEL SHEET

In [36]:
data.to_excel('AMAZON.xlsx')
