In [2]:
from utils import * 

import numpy as np
import pandas as pd
from pprint import pprint
import os
import matplotlib.pyplot as plt
from collections import defaultdict

# Gensim
from gensim.test.utils import datapath
from gensim.test.utils import common_texts, get_tmpfile

# Plotting tools
import pyLDAvis
import pyLDAvis.gensim  # don't skip this
import matplotlib.pyplot as plt
%matplotlib inline

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.option_context('display.max_colwidth', 500);

# Enable logging for gensim - optional
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.ERROR)

import warnings
warnings.filterwarnings("ignore",category=DeprecationWarning)


config = get_config('config.yaml')

In [4]:
df = pd.read_csv(config['csv_input_local']['bolt_apple_google_p1'], index_col=0)
df = df.reset_index(drop=True)

In [5]:
input_path = 'lda_mallet_model/mallet_lda_df.pkl'
# input_path = 'lda_mallet_model/_mallet_lda_df.pkl'
df_topic_sents_keywords_m = pd.read_pickle(input_path)
df_topic_sents_keywords_m

Unnamed: 0,Dominant_Topic,Perc_Contribution,Keywords,0
0,0.0,0.0859,"car, driver, passenger, city, fact, street, ex...","[thing, prenook, trip, catch, flight, pm, toda..."
1,1.0,0.0924,"taxi, rate, day, application, app, fee, scoote...","[need, section, airport, spot, number, app]"
2,13.0,0.1102,"support, email, journey, complaint, reason, we...","[friend, use, would_allow, journey, support, c..."
3,13.0,0.0776,"support, email, journey, complaint, reason, we...","[driver, support, computer, response, case, re..."
4,17.0,0.1186,"driver, rider, promo, rating, case, quality, a...","[rating, moment, driver, rating, trip, driver,..."
...,...,...,...,...
40360,15.0,0.0711,"time, route, drivers_alway, drive, traffic, co...","[time, nonsense, driver, drive, support, usless]"
40361,10.0,0.0686,"love, service, friendly_driver, nice_one, good...",[love]
40362,0.0,0.0500,"car, driver, passenger, city, fact, street, ex...",[]
40363,17.0,0.0618,"driver, rider, promo, rating, case, quality, a...",[super_convenient]


In [6]:
df_dominant_topic_m = find_dominant_topic_in_each_doc(df_topic_sents_keywords_m, df)
print("Finding the dominant topic in each document")
df_dominant_topic_m.head(40).style.set_properties(subset=['review'], **{'width': '400px'})

Finding the dominant topic in each document


Unnamed: 0,Dominant_Topic,Perc_Contribution,Keywords,Text,rating,review
0,0.0,0.0859,"car, driver, passenger, city, fact, street, excuse, kind, turn, condition","['thing', 'prenook', 'trip', 'catch', 'flight', 'pm', 'today', 'booking', 'went', 'book', 'find', 'driver', 'book', 'service', 'get_rid', 'app', 'today']",1,The first thing I noticed is that you can't prenook your trip which isn't great. As I had to catch a flight at 4 pm today I decided to give it a go anyway using the instant booking. There were 2 drivers available near me and I went to book one of them. Completed the booking only to find that the driver disappeared and he now was shown as engaged. Went to book the other one and same happened again. Unreliable service. I will get rid of the app today
1,1.0,0.0924,"taxi, rate, day, application, app, fee, scooter, star, work, move","['need', 'section', 'airport', 'spot', 'number', 'app']",4,"Your GPS setting around Cape Town International Airport needs to be looked at. It always take my location to the drop off and go section of the airport when I'm actually about 1km away from that spot. Sometimes the drivers mobile numbers does not match their profile. But other than that, the app is quite user friendly."
2,13.0,0.1102,"support, email, journey, complaint, reason, week, refund, customer_service, reply, team","['friend', 'use', 'would_allow', 'journey', 'support', 'customer', 'promotion', 'contact', 'team', 'reply']",1,I was invited by my friend and was given a £10 free promo to use on my first trip. I tried to use it and it would not allow me so I booked my journey anyway. I called customer support and they simply said sorry nothing we can do. Not very customer friendly and really am disappointed because I downloaded it because of the free promotion. I did contact the team and they are of no help just like your reply
3,13.0,0.0776,"support, email, journey, complaint, reason, week, refund, customer_service, reply, team","['driver', 'support', 'computer', 'response', 'case', 'response', 'support', 'discount', 'account', 'thought', 'client']",3,"Your drivers are great BUT your support is no good. Why can't it be like Uber where you can actually speak to someone. Instead of getting computer generated responses or like in my case, no response at all. The three stars are for the support. Also I have received a single discount to this account since I installed it three months ago. I'm very disappointed in Bolt, I thought they were there for their clients."
4,17.0,0.1186,"driver, rider, promo, rating, case, quality, attitude, moment, cancelled_trip, profile","['rating', 'moment', 'driver', 'rating', 'trip', 'driver', 'apartment', 'ride', 'drive', 'destination']",3,"Loving the app, but can only give it a medium rating at the moment as the drivers you give a bad rating on still gets your trip. I had a bad experience with a driver outside my apartment, but he always gets my rides as he's parked there when with my one star and bad review (refused to drive to the destination, rude and aggressive)."
5,16.0,0.0966,"app, phone, number, code, call, message, contact, datum, download, detail","['price', 'try', 'sister', 'invitation', 'code', 'code', 'reply']",2,The prices are good and the service is a great when I try it with my sister but when I register the invitation code became inactive. They ask me to change the payment method but the code stay inactive. I contacted them via the app but no reply.
6,17.0,0.0845,"driver, rider, promo, rating, case, quality, attitude, moment, cancelled_trip, profile","['promotion', 'price', 'driver', 'rider', 'price', 'change', 'driver', 'destination', 'rider', 'thank', 'fare', 'taxi', 'pay']",3,"I like the promotion price. But some of the drivers frown as if we, the riders, caused the price to change. Some drivers call to ask of the destination of the riders before coming or not. I personally hate that! Thanks. What is the essence of picking Bolt if the fare with taxis are now the same? Where I used to pay GH¢5.00, I now pay GH¢9.00 or GH¢10.00. WHY?"
7,11.0,0.076,"driver, trip, fare, direction, feedback, system, cancel_ride, man, rubbish, mall","['app', 'price_range', 'trip', 'getting_destination', 'amount', 'rate', 'app', 'traffic', 'delay', 'abeg']",2,"Of late the app would provide a price range for the trip but on getting to destination the amount i end up paying is a far cry from the rate the app provided. Imagine knowing you won't be paying more than 2000 naira and you end up paying 3000 naira, with no traffic and no delay whatsoever. This isn't right at all. Please fix it abeg."
8,13.0,0.0958,"support, email, journey, complaint, reason, week, refund, customer_service, reply, team","['price', 'support', 'division', 'query', 'trip', 'team', 'evidence', 'service']",1,Always been charged a higher price than the one agreed to. Unable to get through to the support division to query such matters and drivers very rude. I had 2 trips where I was charged double and yet there is no team to help us on such queries and have all the evidence in regards to that. I'm soo unhappy with the service
9,15.0,0.0803,"time, route, drivers_alway, drive, traffic, cost, estimate, road, travel, home","['star', 'way', 'ride', 'point', 'driver', 'take_longer', 'route']",1,Jus read the reviews and I must say if there's something lower than 1 star I wuld have given you. It's supposed to be a faster way to ride but what is the point when the driver will take the longer route. I'm out!


In [None]:
# df_dominant_topic_m.to_csv('bolt_df_dominant_topic_m.csv')

In [27]:
print("Find the most representative document for each topic")
sent_topics_sorteddf_m = find_most_representative_doc_for_each_doc(df_topic_sents_keywords_m, df)
sent_topics_sorteddf_m.style.set_properties(subset=['review'], **{'width': '400px'})

Find the most representative document for each topic


Unnamed: 0,Dominant_Topic,Perc_Contribution,Topic_Keywords,star_rating,review
0,0.0,0.1726,"car, driver, passenger, city, fact, street, excuse, kind, turn, condition",1,Mehedi the driver was very unfriendly...Was driving the car recklessy over bumps..talking on the phone while driving.Body language was not acceptable picking his nose...when asked about the fare he started arguing saying he doesn't know and it is not his company.Telling me rudely he has another job to go to.The car was smelling full of cigarettes.The passenger seat was not in the upright position could hardly get my legs in..such a stressful journey!!!
1,1.0,0.1705,"taxi, rate, day, application, app, fee, scooter, star, work, move",3,"Use this 2 times a day, very useful for me. Hower you need to fix 1 thing: release ( or better don't use at all) a WakeLock on ""Driving/Share"" and ""Rate"" activities cause it is so amazingly annoying. Here is the usual user work flow: order taxi -> swith screen off, use other apps etc -> get alert that taxi arrived -> click OK -> screen off, put it in the pocket and go to taxi (at this stage user DON'T care about app anymore) -> sit in the taxi -> driver starts driving -> and app silently lights up the screen in the pocket AND keep it awake eating obviously a battery -> you switch it off again -> at the end of driving app AGAIN wakes up the screen with rate activity oh c'mon... very annoying. And you need to add 1 optional textfiled below address for details to the driver. Otherwice all good"
2,2.0,0.1703,"ride, easy_use, business, night, scam, town, voucher, book, offer, market",1,"I’ve had many problems with the customer support l, once the driver started screaming at me and demanding for me to leave the car! Although I ordered a bolt premium! The driver asked me to leave the vehicle and wouldn’t cancel the ride (if you have started the ride you as the customer can’t cancel it) I called the support they apologised and lied to me saying they will put 50£ worth of rides to my account. The next day my account was disabled, bolt demanded i provide evidence or they will dismiss this case. I didn’t get the money back for the unfinished horrible ride and no “50£” worth of credit. Another angering event was when the driver didn’t wait for me and just left starting the trip (as i said before this app does not provide a chance for the customer to cancel the ride once the driver has started it) it was also a bolt premium. I called the support multiple times, they have told me that they too can’t cancel the ride. The money has been deducted from my account AGAIN! DONT DOWNLOAD THIS APP!"
3,3.0,0.2079,"customer, discount, charge, end, month, distance, morning, pay, care, arrival",1,I am a loyal and daily customer but lately the service I have received from your driver's is so not on. My kids also travel with me and most driver's do not apply to the road rules. And the 4 persent increase is not a fair increase..... I make use of this service every morning and most afternoons and the price I had to pay this morning was overpriced. Normally I would pay about R35 to R40 but this morning it was R55. Imagine I have to pay daily R110 just to travel from home to work and that is not even to drop off the kids at school or to pick them up from aftercare..... I would pay more than R3500 a month to just get from point a to point b. From now on I will rather make other arrangements
4,4.0,0.1597,"time, minute, order, cab, pick, min, pickup, start, estimation, every_time",1,"Just stood outside for 15 minutes waiting for my driver. The time kept changing from 2 minutes to 4 minutes, to 3 minutes, back to 5 minutes. After 10 minutes I rang the driver who didn’t seem to even realise he’d taken the trip. He said he’s on his way and then canceled a few minutes later. Booked another who’s time also kept going up and down however this one actually did show up. Although due to the cancellation of the first trip I lost my discount 😡"
5,5.0,0.1487,"driver, client, show, life, cancellation, rude, office, block, cancel_trip, communication",5,I've left items that were very expensive and important to me I went to thr office and they managed to get hold of the driver bcz the number that was in the app of the driver was not going through the office managed to get the driver's boss and spoke with him and he pass to us an alternative number of the driver and the Bolt office contact again the driver from that number we finally get the hold of the driver and the Bolt driver came to the house and give back my staff all in order thank u
6,6.0,0.1675,"location, destination, option, map, place, address, point, pick_location, screen, everytime",3,"Can't set default home address. Can't add favourites. Why? These should be the rock bottom minimum! Trying to use current location always sets wrong address. Address picked on the map never gets stored. Not useless, but that's all. It is very far from an app that can be considered good. EDIT: My bad - thank you, i can set Home now. I still can't set frequent addresses (besides work) and still can't check my address history to reuse an address. But at least, with home address set, it's feels a bit more OK now."
7,7.0,0.2703,"issue, app, response, today, thing, update, experience, safety, yesterday, review",2,"My experience with Taxify started pleasantly enough with great experience from their drivers. I deliberately moved to Taxify because of my experience with another ride hailing service. However, of late, the service seems to be declining, from my experience. The biggest thing for me is customer service. Getting prompt responses rarely occurs. In fact, the last message I sent them, I never got a response. Additionally, I had a terrible experience yesterday and I've been trying to report it since yesterday afternoon. But, there's a glitch in that each time I click ""report tab"" under driver misconduct, it doesn't load. Then, to add to that, when I try to report via the safety tab, again, it doesn't load. The rest of the app is working fine. Last time I reported a glitch with the app, I was told that I needed to check thar the app was updated and u had my GPS turned all, all of which was already in place. After I shared that, there's no response. There seem to be a number of glitches and this can be a safety issue. Honestly, there should be functioning tool in place that makes it easy to report a ride either as it's occurring as, or as soon after. Twice, I had issues with a driver and I struggled to report it. And again yes, both my app has been updated and GPS turned on (when I'm using it that is)."
8,8.0,0.1491,"price, guy, reason, transportation, pricing, platform, purpose, alternative, range, demand",1,Evil poor and negative platform please guys be careful about this guys if you put your card on the platform they will give you a price but at the end of the trip they can deduct almost double the price
9,9.0,0.1605,"driver, work, experience, friend, area, vehicle, person, job, family, drop",5,Perfect app to get around long distance or short ... However if there was one thing I'd change is the the driver screening ... I understand they are creating job opportunities and I respect that. But work is work and a few of the drivers from personal experience have taken there job lightly resulting in the bolt brand getting negative publicity via word of mouth . Aside from that this app is exception with minor misconduct on the part of a very few unprofessional individuals.


In [22]:
print("Topic distribution across documents")
df_dominant_topic_m = topic_distribution_across_docs(df_topic_sents_keywords_m)
df_dominant_topic_m.style.set_properties(subset=['Keywords'], **{'width': '400px'})

Topic distribution across documents


Unnamed: 0,Dominant_Topic,Keywords,Num_Documents,Perc_Documents
0,0.0,"car, driver, passenger, city, fact, street, excuse, kind, turn, condition",14582,0.3613
1,1.0,"taxi, rate, day, application, app, fee, scooter, star, work, move",2622,0.065
2,2.0,"ride, easy_use, business, night, scam, town, voucher, book, offer, market",1785,0.0442
3,3.0,"customer, discount, charge, end, month, distance, morning, pay, care, arrival",1651,0.0409
4,4.0,"time, minute, order, cab, pick, min, pickup, start, estimation, every_time",1518,0.0376
5,5.0,"driver, client, show, life, cancellation, rude, office, block, cancel_trip, communication",1458,0.0361
6,6.0,"location, destination, option, map, place, address, point, pick_location, screen, everytime",1449,0.0359
7,7.0,"issue, app, response, today, thing, update, experience, safety, yesterday, review",1447,0.0358
8,8.0,"price, guy, reason, transportation, pricing, platform, purpose, alternative, range, demand",1445,0.0358
9,9.0,"driver, work, experience, friend, area, vehicle, person, job, family, drop",1366,0.0338


In [24]:
# df_dominant_topic_m['Num_Documents'].sum()

In [25]:
sent_topics_sorteddf_m.to_csv('test.csv')

## Average Rating per Topic

In [35]:
df_dominant_topic_m

Unnamed: 0,Dominant_Topic,Perc_Contribution,Keywords,Text,rating,review
0,0.0,0.0859,"car, driver, passenger, city, fact, street, ex...","[thing, prenook, trip, catch, flight, pm, toda...",1,The first thing I noticed is that you can't pr...
1,1.0,0.0924,"taxi, rate, day, application, app, fee, scoote...","[need, section, airport, spot, number, app]",4,Your GPS setting around Cape Town Internationa...
2,13.0,0.1102,"support, email, journey, complaint, reason, we...","[friend, use, would_allow, journey, support, c...",1,I was invited by my friend and was given a £10...
3,13.0,0.0776,"support, email, journey, complaint, reason, we...","[driver, support, computer, response, case, re...",3,Your drivers are great BUT your support is no ...
4,17.0,0.1186,"driver, rider, promo, rating, case, quality, a...","[rating, moment, driver, rating, trip, driver,...",3,"Loving the app, but can only give it a medium ..."
...,...,...,...,...,...,...
40360,15.0,0.0711,"time, route, drivers_alway, drive, traffic, co...","[time, nonsense, driver, drive, support, usless]",1,"Its the 5th time Im saying, But When Will ther..."
40361,10.0,0.0686,"love, service, friendly_driver, nice_one, good...",[love],5,Just love it!
40362,0.0,0.0500,"car, driver, passenger, city, fact, street, ex...",[],5,Great app!
40363,17.0,0.0618,"driver, rider, promo, rating, case, quality, a...",[super_convenient],5,Super convenient and fast!


In [26]:
# get list of unique values sorted - https://www.geeksforgeeks.org/python-get-unique-values-list/
list_unique_topics = np.unique(df_dominant_topic_m['Dominant_Topic'])

In [28]:
for topic in list_unique_topics:
    avg_rating = round(df_dominant_topic_m[df_dominant_topic_m['Dominant_Topic'] == topic]['rating'].mean(), 2)
    print('Average Rating for Topic ' + str(topic) + ' = ' + str(avg_rating))

Average Rating for Topic 0.0 = 4.4
Average Rating for Topic 1.0 = 3.48
Average Rating for Topic 2.0 = 3.7
Average Rating for Topic 3.0 = 3.04
Average Rating for Topic 4.0 = 3.26
Average Rating for Topic 5.0 = 3.7
Average Rating for Topic 6.0 = 3.05
Average Rating for Topic 7.0 = 2.97
Average Rating for Topic 8.0 = 3.64
Average Rating for Topic 9.0 = 3.63
Average Rating for Topic 10.0 = 4.61
Average Rating for Topic 11.0 = 2.96
Average Rating for Topic 12.0 = 2.42
Average Rating for Topic 13.0 = 2.3
Average Rating for Topic 14.0 = 3.1
Average Rating for Topic 15.0 = 3.5
Average Rating for Topic 16.0 = 2.37
Average Rating for Topic 17.0 = 3.43
Average Rating for Topic 18.0 = 3.78
Average Rating for Topic 19.0 = 3.39


## Average Sentiment per Topic

In [8]:
df_dominant_topic_m_sentiment = pd.read_csv(config['csv_input_local']['bolt_apple_google_p1_topic_sentiment'], index_col=0)
df_dominant_topic_m_sentiment = df_dominant_topic_m_sentiment.reset_index(drop=True)
df_dominant_topic_m_sentiment

In [7]:
for topic in df_dominant_topic_m_sentiment['Dominant_Topic'].unique():
#     print('Topic'topic)
    avg_rating = df_dominant_topic_m_sentiment[df_dominant_topic_m_sentiment['Dominant_Topic'] == topic]['sentiment_score'].mean()
    print('Average Sentiment for Topic ' + str(topic) + ' = ' + str(avg_rating))

Unnamed: 0,Dominant_Topic,Perc_Contribution,Keywords,Text,rating,review
0,0.0,0.0859,"car, driver, passenger, city, fact, street, ex...","[thing, prenook, trip, catch, flight, pm, toda...",1,The first thing I noticed is that you can't pr...
1,1.0,0.0924,"taxi, rate, day, application, app, fee, scoote...","[need, section, airport, spot, number, app]",4,Your GPS setting around Cape Town Internationa...
2,13.0,0.1102,"support, email, journey, complaint, reason, we...","[friend, use, would_allow, journey, support, c...",1,I was invited by my friend and was given a £10...
3,13.0,0.0776,"support, email, journey, complaint, reason, we...","[driver, support, computer, response, case, re...",3,Your drivers are great BUT your support is no ...
4,17.0,0.1186,"driver, rider, promo, rating, case, quality, a...","[rating, moment, driver, rating, trip, driver,...",3,"Loving the app, but can only give it a medium ..."
...,...,...,...,...,...,...
40360,15.0,0.0711,"time, route, drivers_alway, drive, traffic, co...","[time, nonsense, driver, drive, support, usless]",1,"Its the 5th time Im saying, But When Will ther..."
40361,10.0,0.0686,"love, service, friendly_driver, nice_one, good...",[love],5,Just love it!
40362,0.0,0.0500,"car, driver, passenger, city, fact, street, ex...",[],5,Great app!
40363,17.0,0.0618,"driver, rider, promo, rating, case, quality, a...",[super_convenient],5,Super convenient and fast!
