# Cleaning the Indian Bank Review dataset

Author: Arissa Noordina Bahari

Date: 7 January 2024

# Introduction

In this notebook, we are cleaning the data to be used for the aspect-based sentiment analysis (ABSA) project to determine which bank ranks best from a pool of 10 different Indian banks.

The original dataset can be found [here](https://www.kaggle.com/datasets/dhavalrupapara/banks-customer-reviews-dataset).


In [None]:
# import essential libraries
import pandas as pd
import numpy as np


In [None]:
# load the dataset
data = pd.read_csv('/content/bank_reviews.csv')

# check the dimension of the dataset
print(data.shape)

# view the first 5 rows of the dataset
data.head()

(1000, 10)


Unnamed: 0,author,date,address,bank,rating,review_title_by_user,review,bank_image,rating_title_by_user,useful_count
0,AMRENDRA T,"Mar 21, 2020",New delhi,SBI,4.0,"""Best saving""",State Bank Of India is located nearby in our a...,https://static.bankbazaar.com/images/common/ba...,Great!,133
1,BISHWA,"Mar 20, 2020",Kolkata,SBI,5.0,"""Good service""","I have my salary account in SBI, when I applie...",https://static.bankbazaar.com/images/common/ba...,Blown Away!,89
2,SANTOSH,"Mar 20, 2020",Hooghly,Axis Bank,5.0,"""Excellent Service""",I am using Axis bank saving account for the p...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,48
3,MAHADEV,"Mar 20, 2020",Pune,HDFC Bank,5.0,"""Excellent service""",I have my salary bank account in HDFC bank for...,https://static.bankbazaar.com/images/common/ba...,Blown Away!,52
4,R,"Mar 20, 2020",Bangalore,review,5.0,"""Good account""","Close to around 10 years, I am holding this Co...",https://static.bankbazaar.com/images/common/ba...,Blown Away!,22


In [None]:
# remove columns that are not using for analysis
# columns to remove: ['author', 'date', 'bank_image']

data.drop(columns=['author', 'date', 'bank_image', 'useful_count'], axis=1, inplace=True)

In [None]:
# some of the data without 'bank' name and is named as 'review' in the dataset
# convert these data to 'Unknown'

data['bank'].replace('review','Unknown', inplace=True)

In [None]:
data['Full_Review'] = data['review_title_by_user'] + data['review']
data['Full_Review'] = data['Full_Review'].astype(str)
data.head()

Unnamed: 0,address,bank,rating,review_title_by_user,review,rating_title_by_user,Full_Review
0,New delhi,SBI,4.0,"""Best saving""",State Bank Of India is located nearby in our a...,Great!,"""Best saving""State Bank Of India is located ne..."
1,Kolkata,SBI,5.0,"""Good service""","I have my salary account in SBI, when I applie...",Blown Away!,"""Good service""I have my salary account in SBI,..."
2,Hooghly,Axis Bank,5.0,"""Excellent Service""",I am using Axis bank saving account for the p...,Blown Away!,"""Excellent Service""I am using Axis bank saving..."
3,Pune,HDFC Bank,5.0,"""Excellent service""",I have my salary bank account in HDFC bank for...,Blown Away!,"""Excellent service""I have my salary bank accou..."
4,Bangalore,Unknown,5.0,"""Good account""","Close to around 10 years, I am holding this Co...",Blown Away!,"""Good account""Close to around 10 years, I am h..."


In [None]:
data

Unnamed: 0,address,bank,rating,review_title_by_user,review,rating_title_by_user,Full_Review
0,New delhi,SBI,4.0,"""Best saving""",State Bank Of India is located nearby in our a...,Great!,"""Best saving""State Bank Of India is located ne..."
1,Kolkata,SBI,5.0,"""Good service""","I have my salary account in SBI, when I applie...",Blown Away!,"""Good service""I have my salary account in SBI,..."
2,Hooghly,Axis Bank,5.0,"""Excellent Service""",I am using Axis bank saving account for the p...,Blown Away!,"""Excellent Service""I am using Axis bank saving..."
3,Pune,HDFC Bank,5.0,"""Excellent service""",I have my salary bank account in HDFC bank for...,Blown Away!,"""Excellent service""I have my salary bank accou..."
4,Bangalore,Unknown,5.0,"""Good account""","Close to around 10 years, I am holding this Co...",Blown Away!,"""Good account""Close to around 10 years, I am h..."
...,...,...,...,...,...,...,...
995,Ernakulam,Unknown,3.5,"""Good network of ATM""",I am holding a saving account with FEDERAL ban...,Pretty good,"""Good network of ATM""I am holding a saving acc..."
996,Ernakulam,Axis Bank,4.0,"""Mobile app is good""",There is no mandatory balance to keep in my Ax...,Great!,"""Mobile app is good""There is no mandatory bala..."
997,Bangalore,Axis Bank,4.0,"""Unhappy with the charges""","In Axis bank, every month they are charging me...",Great!,"""Unhappy with the charges""In Axis bank, every ..."
998,Hyderabad,Axis Bank,5.0,"""Good Bank""",I have a salary account with AXIS bank and I h...,Blown Away!,"""Good Bank""I have a salary account with AXIS b..."


In [None]:
import nltk
nltk.download('vader_lexicon')
from nltk.sentiment import SentimentIntensityAnalyzer

# initialize the object of SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

[nltk_data] Downloading package vader_lexicon to /root/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [None]:
#CHANGE REVIEW COLUMN HERE TO SEE EFFECTS ON SENTIMENT CATEGORY COLUMN
# calculate the polarity scores of the review
data['polarity'] = data['review'].apply(sia.polarity_scores)
data['compound'] = data['polarity'].apply(lambda x: x.get('compound'))

In [None]:
# define a function to perform the sentiment categorization
def categorize(score):
    if score < 0:
        return 'To be Improved'
    elif score > 0.6:
        return 'Blown Away'
    else:
        return 'Neutral'


In [None]:
# insert a new column 'Sentiment_Category' by categorized the sentiment based on the compound score
data['Sentiment_Category'] = data['compound'].apply(categorize)

# view the first 5 rows of the data after new columns inserted
data.head()

Unnamed: 0,address,bank,rating,review_title_by_user,review,rating_title_by_user,Full_Review,polarity,compound,Sentiment_Category
0,New delhi,SBI,4.0,"""Best saving""",State Bank Of India is located nearby in our a...,Great!,"""Best saving""State Bank Of India is located ne...","{'neg': 0.0, 'neu': 0.807, 'pos': 0.193, 'comp...",0.8832,Blown Away
1,Kolkata,SBI,5.0,"""Good service""","I have my salary account in SBI, when I applie...",Blown Away!,"""Good service""I have my salary account in SBI,...","{'neg': 0.0, 'neu': 0.829, 'pos': 0.171, 'comp...",0.835,Blown Away
2,Hooghly,Axis Bank,5.0,"""Excellent Service""",I am using Axis bank saving account for the p...,Blown Away!,"""Excellent Service""I am using Axis bank saving...","{'neg': 0.074, 'neu': 0.739, 'pos': 0.187, 'co...",0.7717,Blown Away
3,Pune,HDFC Bank,5.0,"""Excellent service""",I have my salary bank account in HDFC bank for...,Blown Away!,"""Excellent service""I have my salary bank accou...","{'neg': 0.039, 'neu': 0.796, 'pos': 0.165, 'co...",0.7964,Blown Away
4,Bangalore,Unknown,5.0,"""Good account""","Close to around 10 years, I am holding this Co...",Blown Away!,"""Good account""Close to around 10 years, I am h...","{'neg': 0.0, 'neu': 0.785, 'pos': 0.215, 'comp...",0.8445,Blown Away


In [None]:
data["Sentiment_Category"].value_counts()

Blown Away        611
Neutral           264
To be Improved    125
Name: Sentiment_Category, dtype: int64

In [None]:
data.to_csv('Bank_VADER_Reviews.csv', index = False)