In [13]:
#Task 2: Cleaning. Preprocessing, Analysing#

In [14]:
#import necessary libraries
import re
import pandas as pd
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer


In [15]:
!pip install spacy



In [16]:
!pip install wordcloud

from wordcloud import WordCloud, STOPWORDS



In [17]:
# visualization library
import matplotlib.pyplot as plt

In [21]:
# Loading the dataframe
df = pd.read_csv("BA_reviews.csv")

In [22]:
# Checking the loaded data
df.head()   

Unnamed: 0.1,Unnamed: 0,reviews
0,0,"Not Verified | The worst service ever, my bag..."
1,1,✅ Trip Verified | 4/4 flights we booked this ...
2,2,✅ Trip Verified | British Airways has a total...
3,3,"✅ Trip Verified | London Heathrow to Keflavik,..."
4,4,✅ Trip Verified | Mumbai to London Heathrow in...


In [23]:
#few basic info
print("There are {} observations and {} features in this dataset. \n".format(df.shape[0],df.shape[1]))


There are 3000 observations and 2 features in this dataset. 



In [25]:
# Define the column name where you want to remove text
column_name = 'reviews'

# Use regular expressions to remove text before the "|" character
df[column_name] = df[column_name].str.replace(r'^.*\|', '', regex=True)


In [26]:
df.head()


Unnamed: 0.1,Unnamed: 0,reviews
0,0,"The worst service ever, my baggage did not a..."
1,1,4/4 flights we booked this holiday were dela...
2,2,British Airways has a total lack of respect ...
3,3,"London Heathrow to Keflavik, Iceland in Busin..."
4,4,Mumbai to London Heathrow in Business Class o...


In [27]:
# Save the modified DataFrame back to our CSV file
output_csv = 'BA_reviews.csv'  
df.to_csv(output_csv, index=False)

In [28]:

# Define functions for text cleaning steps
def clean_text(text):
    # Lowercasing
    text = text.lower()
    
    # Tokenization
    tokens = word_tokenize(text)
    
    # Removing Punctuation and Special Characters
    text = re.sub(r'[^\w\s]', '', text)
    
    # Removing Stopwords
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [word for word in tokens if word not in stop_words]
    
    # Lemmatization
    lemmatizer = WordNetLemmatizer()
    lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_tokens]
    
    # Join the cleaned tokens back into a string
    cleaned_text = ' '.join(lemmatized_words)
    
    return cleaned_text

# Apply the clean_text function to the 'text_data' column
df['reviews'] = df['reviews'].apply(clean_text)

# Save the cleaned DataFrame back to a CSV file
#output_csv_file_path = 'cleaned_file.csv'
#df.to_csv(output_csv_file_path, index=False)


In [29]:
df.head()


Unnamed: 0.1,Unnamed: 0,reviews
0,0,"worst service ever , baggage arrive time . fli..."
1,1,4/4 flight booked holiday delayed 1-2 hour . w...
2,2,british airway total lack respect customer . b...
3,3,"london heathrow keflavik , iceland business cl..."
4,4,mumbai london heathrow business class ageing b...


In [30]:
# Save the modified DataFrame back to our CSV file
output_csv = 'BA_reviews.csv'  
df.to_csv(output_csv, index=False)

In [31]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
# text is in a column 'reviews'
text_data = df['reviews']

# Initialize the sentiment analyzer
sid = SentimentIntensityAnalyzer()

# Calculate sentiment scores for each review and create a new column for sentiment
df['sentiment_scores'] = text_data.apply(lambda x: sid.polarity_scores(x))

# Extract the sentiment polarity (positive, neutral, negative) from the scores
df['sentiment'] = df['sentiment_scores'].apply(lambda x: 'positive' if x['compound'] > 0 else ('neutral' if x['compound'] == 0 else 'negative'))

# Print the DataFrame with sentiment scores
print(df[['reviews', 'sentiment']])


                                                reviews sentiment
0     worst service ever , baggage arrive time . fli...  negative
1     4/4 flight booked holiday delayed 1-2 hour . w...  positive
2     british airway total lack respect customer . b...  positive
3     london heathrow keflavik , iceland business cl...  positive
4     mumbai london heathrow business class ageing b...  positive
...                                                 ...       ...
2995  lgw-szg business class . sure point new reduce...  positive
2996  flew bangkok london heathrow 13th march . firs...  positive
2997  lhr-jfk/iad-lhr . superb round trip ( fair pri...  positive
2998  flew 30th march 2015 lhr-yvr return school rug...  positive
2999  heathrow istanbul flight ba0676 club class . g...  positive

[3000 rows x 2 columns]


In [32]:
df.head()


Unnamed: 0.1,Unnamed: 0,reviews,sentiment_scores,sentiment
0,0,"worst service ever , baggage arrive time . fli...","{'neg': 0.463, 'neu': 0.537, 'pos': 0.0, 'comp...",negative
1,1,4/4 flight booked holiday delayed 1-2 hour . w...,"{'neg': 0.099, 'neu': 0.628, 'pos': 0.272, 'co...",positive
2,2,british airway total lack respect customer . b...,"{'neg': 0.103, 'neu': 0.763, 'pos': 0.135, 'co...",positive
3,3,"london heathrow keflavik , iceland business cl...","{'neg': 0.0, 'neu': 0.889, 'pos': 0.111, 'comp...",positive
4,4,mumbai london heathrow business class ageing b...,"{'neg': 0.0, 'neu': 0.795, 'pos': 0.205, 'comp...",positive


In [33]:

# Save the DataFrame with the new sentiment column to a new CSV file
df.to_csv('BA_reviews_senti_added.csv', index=False)


In [1]:
!pip install gensim



In [6]:
#Topic Modeling

import pandas as pd
from gensim import corpora, models

# Load CSV file into a DataFrame
df = pd.read_csv('BA_reviews_senti_added.csv')

# text is in a column 'reviews'
text_data = df['reviews']

# Tokenize the text data
from nltk.tokenize import word_tokenize
text_data = text_data.apply(lambda x: word_tokenize(x.lower()))

# Create a dictionary and a corpus
dictionary = corpora.Dictionary(text_data)
corpus = [dictionary.doc2bow(text) for text in text_data]

# Build the LDA model
lda_model = models.LdaModel(corpus, num_topics=5, id2word=dictionary, passes=15)

# Print the topics for each review
for i, row in df.iterrows():
    text = row['reviews']
    bow = dictionary.doc2bow(word_tokenize(text.lower()))
    topics = lda_model.get_document_topics(bow)
    print(f"Review {i} Topics: {topics}")

# Alternatively, you can store the topics in a new column
df['topics'] = df['reviews'].apply(lambda x: lda_model.get_document_topics(dictionary.doc2bow(word_tokenize(x.lower()))))

# Save the DataFrame with topics as a new column to a new CSV file
df.to_csv('BA_reviews_aftr sent_topics.csv', index=False)



Review 0 Topics: [(0, 0.010002725), (1, 0.01038963), (2, 0.9591056), (3, 0.010000768), (4, 0.010501273)]
Review 1 Topics: [(0, 0.010063098), (1, 0.010316771), (2, 0.3552741), (3, 0.010065682), (4, 0.61428034)]
Review 2 Topics: [(2, 0.99684244)]
Review 3 Topics: [(2, 0.0364643), (4, 0.9546149)]
Review 4 Topics: [(2, 0.09858679), (4, 0.89647967)]
Review 5 Topics: [(2, 0.9291379), (4, 0.06277979)]
Review 6 Topics: [(4, 0.99265826)]
Review 7 Topics: [(1, 0.09887774), (2, 0.3295063), (4, 0.56779593)]
Review 8 Topics: [(1, 0.26378775), (2, 0.7325117)]
Review 9 Topics: [(2, 0.9969867)]
Review 10 Topics: [(1, 0.6910153), (2, 0.30435064)]
Review 11 Topics: [(2, 0.9974191)]
Review 12 Topics: [(2, 0.58139795), (4, 0.40695196)]
Review 13 Topics: [(1, 0.05808709), (2, 0.5130877), (4, 0.42302454)]
Review 14 Topics: [(0, 0.03982071), (2, 0.9489734)]
Review 15 Topics: [(2, 0.99390805)]
Review 16 Topics: [(4, 0.9933575)]
Review 17 Topics: [(2, 0.98579925)]
Review 18 Topics: [(2, 0.98245895)]
Review 19 

Review 171 Topics: [(1, 0.25255406), (2, 0.7451713)]
Review 172 Topics: [(4, 0.98705494)]
Review 173 Topics: [(0, 0.03204499), (2, 0.95190567)]
Review 174 Topics: [(1, 0.7379511), (2, 0.22848864), (4, 0.022016967)]
Review 175 Topics: [(1, 0.061370883), (2, 0.19606717), (4, 0.7401145)]
Review 176 Topics: [(2, 0.9956266)]
Review 177 Topics: [(1, 0.9663187)]
Review 178 Topics: [(1, 0.480957), (2, 0.09265653), (4, 0.42437136)]
Review 179 Topics: [(4, 0.9844365)]
Review 180 Topics: [(2, 0.5863108), (4, 0.40435916)]
Review 181 Topics: [(1, 0.10200847), (4, 0.88318616)]
Review 182 Topics: [(4, 0.9688579)]
Review 183 Topics: [(4, 0.99154943)]
Review 184 Topics: [(1, 0.2734446), (2, 0.52445006), (4, 0.1990551)]
Review 185 Topics: [(2, 0.98823255)]
Review 186 Topics: [(2, 0.99553317)]
Review 187 Topics: [(1, 0.43217602), (2, 0.15505601), (4, 0.40495667)]
Review 188 Topics: [(1, 0.41209877), (4, 0.57830447)]
Review 189 Topics: [(1, 0.1306971), (2, 0.85999113)]
Review 190 Topics: [(2, 0.34663564),

Review 357 Topics: [(1, 0.16083205), (4, 0.8292161)]
Review 358 Topics: [(4, 0.9648207)]
Review 359 Topics: [(2, 0.6710711), (4, 0.31867874)]
Review 360 Topics: [(4, 0.9946466)]
Review 361 Topics: [(2, 0.9916257)]
Review 362 Topics: [(1, 0.5624888), (2, 0.4314478)]
Review 363 Topics: [(1, 0.066615924), (4, 0.92559654)]
Review 364 Topics: [(4, 0.99313563)]
Review 365 Topics: [(1, 0.55458546), (4, 0.44348103)]
Review 366 Topics: [(1, 0.20697731), (4, 0.7847121)]
Review 367 Topics: [(1, 0.54935664), (2, 0.44306865)]
Review 368 Topics: [(2, 0.99627644)]
Review 369 Topics: [(1, 0.15145168), (2, 0.09648679), (4, 0.7484931)]
Review 370 Topics: [(1, 0.18866643), (2, 0.8087317)]
Review 371 Topics: [(2, 0.9816137)]
Review 372 Topics: [(1, 0.19047451), (2, 0.14255069), (4, 0.6646414)]
Review 373 Topics: [(4, 0.9729384)]
Review 374 Topics: [(2, 0.23765503), (4, 0.7586595)]
Review 375 Topics: [(2, 0.2709781), (4, 0.72153556)]
Review 376 Topics: [(1, 0.34969604), (2, 0.6150228), (4, 0.03365552)]
Rev

Review 515 Topics: [(2, 0.994207)]
Review 516 Topics: [(2, 0.9922178)]
Review 517 Topics: [(1, 0.02616182), (2, 0.24162579), (4, 0.7216513)]
Review 518 Topics: [(0, 0.033710696), (2, 0.9477381)]
Review 519 Topics: [(2, 0.99243236)]
Review 520 Topics: [(0, 0.04178359), (2, 0.9520294)]
Review 521 Topics: [(2, 0.9893171)]
Review 522 Topics: [(2, 0.99060893)]
Review 523 Topics: [(2, 0.97923934)]
Review 524 Topics: [(2, 0.48779637), (4, 0.5000458)]
Review 525 Topics: [(2, 0.98769474)]
Review 526 Topics: [(4, 0.99078685)]
Review 527 Topics: [(2, 0.0882066), (4, 0.9048118)]
Review 528 Topics: [(2, 0.99609476)]
Review 529 Topics: [(2, 0.8505597), (4, 0.13899937)]
Review 530 Topics: [(2, 0.9553044), (4, 0.038101677)]
Review 531 Topics: [(1, 0.66029686), (2, 0.10680047), (4, 0.22920892)]
Review 532 Topics: [(1, 0.121995635), (2, 0.8735237)]
Review 533 Topics: [(1, 0.108802274), (2, 0.8055418), (3, 0.08157782)]
Review 534 Topics: [(2, 0.9909431)]
Review 535 Topics: [(1, 0.14646436), (2, 0.6180838

Review 694 Topics: [(4, 0.987374)]
Review 695 Topics: [(1, 0.14949223), (2, 0.31398642), (4, 0.5265117)]
Review 696 Topics: [(2, 0.21479227), (4, 0.7814297)]
Review 697 Topics: [(4, 0.9873215)]
Review 698 Topics: [(2, 0.9903716)]
Review 699 Topics: [(1, 0.41020852), (2, 0.060689226), (4, 0.5259591)]
Review 700 Topics: [(1, 0.22583136), (2, 0.6781223), (4, 0.09177745)]
Review 701 Topics: [(1, 0.34411305), (4, 0.6454379)]
Review 702 Topics: [(1, 0.6596818), (2, 0.20669082), (4, 0.13148254)]
Review 703 Topics: [(1, 0.027900718), (2, 0.96763766)]
Review 704 Topics: [(2, 0.3292693), (4, 0.652795)]
Review 705 Topics: [(2, 0.41890353), (4, 0.57353693)]
Review 706 Topics: [(2, 0.96761984)]
Review 707 Topics: [(1, 0.98900634)]
Review 708 Topics: [(2, 0.99702907)]
Review 709 Topics: [(1, 0.05925981), (2, 0.4241624), (4, 0.5145033)]
Review 710 Topics: [(2, 0.9157898), (3, 0.06571777)]
Review 711 Topics: [(1, 0.3304634), (2, 0.10184964), (4, 0.56424487)]
Review 712 Topics: [(4, 0.98733675)]
Review

Review 881 Topics: [(2, 0.99370915)]
Review 882 Topics: [(2, 0.9689856)]
Review 883 Topics: [(1, 0.9195798), (2, 0.06777925)]
Review 884 Topics: [(1, 0.9317517), (2, 0.05893497)]
Review 885 Topics: [(2, 0.9846906)]
Review 886 Topics: [(1, 0.039954465), (4, 0.95196605)]
Review 887 Topics: [(1, 0.033388913), (2, 0.5009078), (4, 0.4635664)]
Review 888 Topics: [(1, 0.22705391), (4, 0.7688929)]
Review 889 Topics: [(1, 0.0114298), (2, 0.19397806), (4, 0.7933759)]
Review 890 Topics: [(1, 0.22142182), (2, 0.12081113), (4, 0.6546063)]
Review 891 Topics: [(2, 0.47924632), (4, 0.51558757)]
Review 892 Topics: [(2, 0.39133707), (4, 0.5954768)]
Review 893 Topics: [(2, 0.27236408), (4, 0.7210402)]
Review 894 Topics: [(2, 0.24229625), (4, 0.75238836)]
Review 895 Topics: [(2, 0.12366739), (4, 0.8712691)]
Review 896 Topics: [(4, 0.99235094)]
Review 897 Topics: [(1, 0.8521695), (4, 0.13373226)]
Review 898 Topics: [(2, 0.9850365)]
Review 899 Topics: [(1, 0.5346767), (2, 0.11016104), (4, 0.3513126)]
Review

Review 1083 Topics: [(1, 0.63079464), (2, 0.23505053), (4, 0.12866877)]
Review 1084 Topics: [(2, 0.2922599), (4, 0.6958632)]
Review 1085 Topics: [(1, 0.18873039), (2, 0.15696573), (4, 0.6481082)]
Review 1086 Topics: [(4, 0.9849905)]
Review 1087 Topics: [(4, 0.98550075)]
Review 1088 Topics: [(1, 0.79040885), (4, 0.19770661)]
Review 1089 Topics: [(1, 0.6842585), (4, 0.30358613)]
Review 1090 Topics: [(0, 0.14380895), (1, 0.32345402), (2, 0.5297706)]
Review 1091 Topics: [(1, 0.79566723), (2, 0.19825679)]
Review 1092 Topics: [(1, 0.49596384), (2, 0.47854432)]
Review 1093 Topics: [(2, 0.99275726)]
Review 1094 Topics: [(2, 0.5079881), (4, 0.48238757)]
Review 1095 Topics: [(0, 0.082247175), (2, 0.4983222), (4, 0.4156691)]
Review 1096 Topics: [(1, 0.11370647), (2, 0.12076116), (4, 0.7597275)]
Review 1097 Topics: [(2, 0.4183631), (4, 0.57567716)]
Review 1098 Topics: [(1, 0.13309985), (2, 0.27629608), (4, 0.58755857)]
Review 1099 Topics: [(0, 0.010974367), (2, 0.9833713)]
Review 1100 Topics: [(2,

Review 1284 Topics: [(2, 0.41711736), (4, 0.57597256)]
Review 1285 Topics: [(1, 0.5469161), (4, 0.44143093)]
Review 1286 Topics: [(2, 0.5880083), (4, 0.403066)]
Review 1287 Topics: [(4, 0.98873055)]
Review 1288 Topics: [(2, 0.99670905)]
Review 1289 Topics: [(1, 0.2873778), (2, 0.7090663)]
Review 1290 Topics: [(1, 0.64087015), (2, 0.35435432)]
Review 1291 Topics: [(1, 0.32543212), (4, 0.66915005)]
Review 1292 Topics: [(2, 0.9923309)]
Review 1293 Topics: [(2, 0.11811384), (4, 0.87562394)]
Review 1294 Topics: [(1, 0.059437178), (2, 0.39263248), (4, 0.5420352)]
Review 1295 Topics: [(1, 0.39759198), (2, 0.22661826), (4, 0.37172744)]
Review 1296 Topics: [(1, 0.726294), (4, 0.26919812)]
Review 1297 Topics: [(4, 0.9877066)]
Review 1298 Topics: [(1, 0.36490282), (2, 0.07372247), (4, 0.555178)]
Review 1299 Topics: [(1, 0.98191786)]
Review 1300 Topics: [(4, 0.9801715)]
Review 1301 Topics: [(2, 0.080457464), (4, 0.9092728)]
Review 1302 Topics: [(1, 0.3809371), (2, 0.6116641)]
Review 1303 Topics: [

Review 1470 Topics: [(4, 0.9973705)]
Review 1471 Topics: [(1, 0.79653716), (4, 0.20011543)]
Review 1472 Topics: [(2, 0.8978392), (4, 0.09749691)]
Review 1473 Topics: [(0, 0.049220003), (2, 0.9368866)]
Review 1474 Topics: [(1, 0.20833652), (2, 0.62498), (4, 0.16255793)]
Review 1475 Topics: [(2, 0.83069915), (4, 0.16010612)]
Review 1476 Topics: [(1, 0.49972895), (2, 0.12901817), (4, 0.3673871)]
Review 1477 Topics: [(1, 0.49587792), (4, 0.48932415)]
Review 1478 Topics: [(1, 0.629103), (2, 0.15083778), (4, 0.21380126)]
Review 1479 Topics: [(1, 0.578277), (4, 0.4196741)]
Review 1480 Topics: [(1, 0.23790134), (4, 0.75672686)]
Review 1481 Topics: [(1, 0.5262113), (2, 0.21770547), (4, 0.25323275)]
Review 1482 Topics: [(2, 0.21739425), (4, 0.7776105)]
Review 1483 Topics: [(1, 0.6936835), (2, 0.17152277), (4, 0.12906994)]
Review 1484 Topics: [(2, 0.21570875), (4, 0.77254707)]
Review 1485 Topics: [(1, 0.7550491), (4, 0.24129325)]
Review 1486 Topics: [(1, 0.8384144), (2, 0.08201817), (4, 0.0764153

Review 1645 Topics: [(1, 0.29579946), (2, 0.3119416), (4, 0.3870334)]
Review 1646 Topics: [(1, 0.07036094), (2, 0.08152629), (4, 0.8461163)]
Review 1647 Topics: [(1, 0.6176139), (2, 0.12353836), (4, 0.2555904)]
Review 1648 Topics: [(2, 0.27749503), (4, 0.7150201)]
Review 1649 Topics: [(2, 0.16599429), (4, 0.8317582)]
Review 1650 Topics: [(1, 0.15128039), (2, 0.10807377), (4, 0.73705775)]
Review 1651 Topics: [(4, 0.9892804)]
Review 1652 Topics: [(1, 0.40930006), (2, 0.18261161), (4, 0.4054068)]
Review 1653 Topics: [(1, 0.84400916), (2, 0.13116096), (4, 0.022825278)]
Review 1654 Topics: [(0, 0.044026535), (2, 0.15586112), (4, 0.7961617)]
Review 1655 Topics: [(4, 0.9932313)]
Review 1656 Topics: [(2, 0.43410683), (4, 0.5632938)]
Review 1657 Topics: [(4, 0.9925357)]
Review 1658 Topics: [(1, 0.369956), (2, 0.2248879), (4, 0.39818308)]
Review 1659 Topics: [(2, 0.5275416), (4, 0.46815062)]
Review 1660 Topics: [(1, 0.32934865), (4, 0.66615677)]
Review 1661 Topics: [(2, 0.2476631), (4, 0.7441361

Review 1820 Topics: [(1, 0.83566016), (4, 0.1567581)]
Review 1821 Topics: [(2, 0.69521546), (4, 0.29103562)]
Review 1822 Topics: [(1, 0.3668894), (2, 0.018007783), (4, 0.61259425)]
Review 1823 Topics: [(1, 0.30422837), (2, 0.43088648), (4, 0.26379982)]
Review 1824 Topics: [(1, 0.07708794), (2, 0.6668933), (4, 0.2533047)]
Review 1825 Topics: [(1, 0.4618542), (4, 0.5288126)]
Review 1826 Topics: [(2, 0.93648845), (4, 0.0607295)]
Review 1827 Topics: [(1, 0.36859414), (2, 0.121256515), (4, 0.5070995)]
Review 1828 Topics: [(1, 0.62760925), (2, 0.29014963), (4, 0.08032237)]
Review 1829 Topics: [(1, 0.050107393), (2, 0.94582313)]
Review 1830 Topics: [(1, 0.35165837), (2, 0.5295685), (4, 0.11661268)]
Review 1831 Topics: [(1, 0.4335884), (2, 0.5194697), (4, 0.044051975)]
Review 1832 Topics: [(4, 0.9875584)]
Review 1833 Topics: [(4, 0.9894637)]
Review 1834 Topics: [(1, 0.2756999), (2, 0.66529405), (4, 0.057749566)]
Review 1835 Topics: [(2, 0.85617244), (4, 0.14176157)]
Review 1836 Topics: [(1, 0.

Review 1994 Topics: [(1, 0.30364144), (4, 0.6912135)]
Review 1995 Topics: [(2, 0.9913677)]
Review 1996 Topics: [(1, 0.20218086), (2, 0.1676302), (4, 0.6260126)]
Review 1997 Topics: [(1, 0.4149085), (4, 0.5807887)]
Review 1998 Topics: [(1, 0.21936122), (4, 0.7724204)]
Review 1999 Topics: [(2, 0.14341055), (4, 0.8451324)]
Review 2000 Topics: [(1, 0.5295628), (2, 0.122116886), (4, 0.3428369)]
Review 2001 Topics: [(1, 0.48078418), (4, 0.5149981)]
Review 2002 Topics: [(1, 0.99492806)]
Review 2003 Topics: [(2, 0.80731225), (4, 0.1902162)]
Review 2004 Topics: [(1, 0.990758)]
Review 2005 Topics: [(4, 0.99305266)]
Review 2006 Topics: [(1, 0.9863064)]
Review 2007 Topics: [(2, 0.15327069), (4, 0.84240735)]
Review 2008 Topics: [(1, 0.45975104), (2, 0.5359475)]
Review 2009 Topics: [(1, 0.043316025), (2, 0.9516773)]
Review 2010 Topics: [(1, 0.6323069), (4, 0.36417767)]
Review 2011 Topics: [(1, 0.767013), (4, 0.22846465)]
Review 2012 Topics: [(2, 0.2781234), (4, 0.716204)]
Review 2013 Topics: [(1, 0.

Review 2179 Topics: [(1, 0.624975), (2, 0.37270355)]
Review 2180 Topics: [(1, 0.058796), (4, 0.9369147)]
Review 2181 Topics: [(1, 0.38706943), (4, 0.6037805)]
Review 2182 Topics: [(1, 0.9873358)]
Review 2183 Topics: [(1, 0.2382394), (2, 0.47619334), (4, 0.27877203)]
Review 2184 Topics: [(2, 0.99163103)]
Review 2185 Topics: [(4, 0.9905529)]
Review 2186 Topics: [(1, 0.28041115), (2, 0.36486122), (4, 0.35000175)]
Review 2187 Topics: [(4, 0.98716015)]
Review 2188 Topics: [(1, 0.97706515), (4, 0.018007463)]
Review 2189 Topics: [(4, 0.99053514)]
Review 2190 Topics: [(4, 0.99759793)]
Review 2191 Topics: [(1, 0.21752732), (4, 0.77845275)]
Review 2192 Topics: [(1, 0.22698024), (2, 0.29360205), (4, 0.47691435)]
Review 2193 Topics: [(4, 0.99514484)]
Review 2194 Topics: [(4, 0.9852549)]
Review 2195 Topics: [(1, 0.23443528), (2, 0.34243742), (4, 0.41442627)]
Review 2196 Topics: [(1, 0.63004124), (2, 0.36657986)]
Review 2197 Topics: [(4, 0.98961455)]
Review 2198 Topics: [(1, 0.64690137), (4, 0.34185

Review 2377 Topics: [(2, 0.9927205)]
Review 2378 Topics: [(1, 0.43393838), (2, 0.5581143)]
Review 2379 Topics: [(1, 0.23023388), (4, 0.7622998)]
Review 2380 Topics: [(2, 0.58473873), (4, 0.40846118)]
Review 2381 Topics: [(1, 0.06756659), (2, 0.9295606)]
Review 2382 Topics: [(4, 0.99144405)]
Review 2383 Topics: [(1, 0.11233134), (2, 0.56563485), (4, 0.31936294)]
Review 2384 Topics: [(1, 0.16150546), (4, 0.8278951)]
Review 2385 Topics: [(1, 0.023374928), (2, 0.14895809), (4, 0.8245926)]
Review 2386 Topics: [(1, 0.022526914), (4, 0.9675328)]
Review 2387 Topics: [(1, 0.097494334), (2, 0.895196)]
Review 2388 Topics: [(1, 0.9905852)]
Review 2389 Topics: [(1, 0.98988765)]
Review 2390 Topics: [(2, 0.76364625), (4, 0.22889651)]
Review 2391 Topics: [(2, 0.09010744), (4, 0.9062216)]
Review 2392 Topics: [(1, 0.41426232), (4, 0.57916445)]
Review 2393 Topics: [(2, 0.83899397), (4, 0.15250055)]
Review 2394 Topics: [(1, 0.7689453), (4, 0.2184281)]
Review 2395 Topics: [(1, 0.22871204), (2, 0.018904533)

Review 2578 Topics: [(1, 0.7070959), (4, 0.29008836)]
Review 2579 Topics: [(4, 0.99010795)]
Review 2580 Topics: [(4, 0.99160546)]
Review 2581 Topics: [(1, 0.4125468), (4, 0.57193017)]
Review 2582 Topics: [(1, 0.24853273), (4, 0.74268126)]
Review 2583 Topics: [(4, 0.9843741)]
Review 2584 Topics: [(1, 0.33537295), (4, 0.6527506)]
Review 2585 Topics: [(4, 0.98204654)]
Review 2586 Topics: [(1, 0.2359594), (2, 0.1860179), (4, 0.5745025)]
Review 2587 Topics: [(0, 0.12489153), (1, 0.28736266), (2, 0.32270464), (4, 0.2626296)]
Review 2588 Topics: [(1, 0.98526144)]
Review 2589 Topics: [(1, 0.33988762), (2, 0.17330305), (4, 0.48245442)]
Review 2590 Topics: [(1, 0.70863193), (4, 0.28800237)]
Review 2591 Topics: [(2, 0.9938762)]
Review 2592 Topics: [(1, 0.391735), (4, 0.5983703)]
Review 2593 Topics: [(2, 0.37236565), (4, 0.6185774)]
Review 2594 Topics: [(4, 0.9858013)]
Review 2595 Topics: [(1, 0.37866524), (2, 0.054948453), (4, 0.5629124)]
Review 2596 Topics: [(2, 0.4327508), (4, 0.5463335)]
Revie

Review 2790 Topics: [(1, 0.28822586), (2, 0.22032727), (4, 0.48804468)]
Review 2791 Topics: [(2, 0.9934731)]
Review 2792 Topics: [(1, 0.21639024), (2, 0.11781456), (4, 0.6607162)]
Review 2793 Topics: [(4, 0.9933304)]
Review 2794 Topics: [(1, 0.15148517), (4, 0.84091777)]
Review 2795 Topics: [(4, 0.9903636)]
Review 2796 Topics: [(4, 0.98469985)]
Review 2797 Topics: [(4, 0.9869222)]
Review 2798 Topics: [(1, 0.11383051), (4, 0.8798622)]
Review 2799 Topics: [(4, 0.9890575)]
Review 2800 Topics: [(4, 0.9946533)]
Review 2801 Topics: [(1, 0.560299), (4, 0.43548882)]
Review 2802 Topics: [(4, 0.992104)]
Review 2803 Topics: [(4, 0.98794127)]
Review 2804 Topics: [(1, 0.35338038), (4, 0.63874334)]
Review 2805 Topics: [(4, 0.99155074)]
Review 2806 Topics: [(1, 0.25850964), (4, 0.72915053)]
Review 2807 Topics: [(1, 0.27027464), (4, 0.72432387)]
Review 2808 Topics: [(1, 0.62306404), (2, 0.3680112)]
Review 2809 Topics: [(4, 0.9781467)]
Review 2810 Topics: [(2, 0.22995542), (4, 0.76347005)]
Review 2811 

In [23]:
!pip install wordcloud==1.8.0

Collecting wordcloud==1.8.0
  Downloading wordcloud-1.8.0.tar.gz (217 kB)
     ------------------------------------ 217.1/217.1 kB 944.7 kB/s eta 0:00:00
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: wordcloud
  Building wheel for wordcloud (setup.py): started
  Building wheel for wordcloud (setup.py): finished with status 'error'
  Running setup.py clean for wordcloud
Failed to build wordcloud


  error: subprocess-exited-with-error
  
  python setup.py bdist_wheel did not run successfully.
  exit code: 1
  
  [20 lines of output]
  running bdist_wheel
  running build
  running build_py
  creating build
  creating build\lib.win-amd64-3.9
  creating build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\color_from_image.py -> build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\tokenization.py -> build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\wordcloud.py -> build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\wordcloud_cli.py -> build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\_version.py -> build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\__init__.py -> build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\__main__.py -> build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\stopwords -> build\lib.win-amd64-3.9\wordcloud
  copying wordcloud\DroidSansMono.ttf -> build\lib.win-amd64-3.9\wordcloud
  UPDATING build\lib.win-amd64-3.9\wordcloud/_version.py
  set bu

In [22]:
#create and visualize word clouds for sentiment score distributions in your data for each sentiment category (positive, negative, and neutral)

from wordcloud import WordCloud
import matplotlib.pyplot as plt

# Load CSV file into a DataFrame
df = pd.read_csv('BA_reviews_senti_added.csv')

#  having a DataFrame called 'df' with a 'sentiment_scores' column
# containing sentiment scores for each row

# Filter rows based on sentiment scores
positive_texts = df[df['sentiment'] == 'positive']['sentiment_scores']
negative_texts = df[df['sentiment'] == 'negative']['sentiment_scores']
neutral_texts = df[df['sentiment'] == 'neutral']['sentiment_scores']

# Convert the sentiment scores to strings
positive_texts = ' '.join(positive_texts.astype(str))
negative_texts = ' '.join(negative_texts.astype(str))
neutral_texts = ' '.join(neutral_texts.astype(str))

# Create word clouds for each sentiment category
wordcloud_positive = WordCloud(width=800, height=400, background_color='white', font_path="arial.ttf").generate(positive_texts)
wordcloud_negative = WordCloud(width=800, height=400, background_color='aqua', font_path="arial.ttf").generate(negative_texts)
wordcloud_neutral = WordCloud(width=800, height=400, background_color='pink', font_path="arial.ttf").generate(neutral_texts)

# Display the word clouds for each sentiment category
plt.figure(figsize=(12, 4))
plt.subplot(131)
plt.imshow(wordcloud_positive, interpolation='bilinear')
plt.title("Word Cloud for Positive Sentiment")
plt.axis("off")

plt.subplot(132)
plt.imshow(wordcloud_negative, interpolation='bilinear')
plt.title("Word Cloud for Negative Sentiment")
plt.axis("off")

plt.subplot(133)
plt.imshow(wordcloud_neutral, interpolation='bilinear')
plt.title("Word Cloud for Neutral Sentiment")
plt.axis("off")

plt.tight_layout()
plt.show()



ValueError: Only supported for TrueType fonts