[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/enliktjioe/master-thesis-2021/blob/main/notebooks/safe_feature_extraction.ipynb)

# Preparation

- wget http://nlp.stanford.edu/software/stanford-postagger-full-2016-10-31.zip
- put in `references/re_2017_johann_et-al` (private files, licensed by the author)
- Update in `FE_Safe.py` variable `path_to_model` and `path_to_jar` with absolute path to its directory

 
**Required libraries**:
```
import nltk
nltk.download('stopwords')
nltk.download('punkt')
```

In [3]:
# import nltk
# nltk.download('stopwords')
# nltk.download('punkt')

import nltk
nltk.download('wordnet')

[nltk_data] Downloading package wordnet to
[nltk_data]     /home/developer/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [4]:
# Python path referencing
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"/references/re_2017_johann_et-al")
    
import FE_SAFE as fs
# print(sys.path)

## [Test] Manual Copy Paste Review 

In [5]:
# %%time


# example_review = """
#         I love bolt. I don‚Äôt use uber often because one ride even if it‚Äôs short is like ¬£11. Whereas from my high street to my house is ¬£3. Not only that but their drivers are SUPER friendly! I was sick one day (my first time using bolt) and the driver was so understanding and encouraged me throughout my journey. Bolt is 100% recommended by me. I don‚Äôt write reviews so that‚Äôs how you know I defiantly recommend it. My Instagram name is: TeeKezi if you wish to get in contact with me about bolt. I‚Äôm not an ambassador üòÇ or anything like that. Just super happy with the service I have been receiving x,5,Love it
#     """

# #     feature_extractor = SAFE()
# feature_extractor = fs.SAFE()
# feature_extractor.extract_from_review(example_review)

CPU times: user 900 ms, sys: 101 ms, total: 1 s
Wall time: 15 s


{'ambassador anything',
 'day driver understanding',
 'encourage throughout journey',
 'get contact bolt',
 'instagram name teekezi',
 'receive x',
 'street house',
 'write review'}

## Data Pre-Processing

In [6]:
import pandas as pd

In [7]:
bolt_path = "../review_mining/csv_output/1_bolt/bolt_google_playstore_review.csv"
uber_path = "../review_mining/csv_output/2_uber/uber_google_playstore_review.csv"
blablacar_path = "../review_mining/csv_output/3_blablacar/blablacar_google_playstore_review.csv"
cabify_path = "../review_mining/csv_output/4_cabify/cabify_google_playstore_review.csv"
via_path = "../review_mining/csv_output/5_via/via_google_playstore_review.csv"

getaround_path = "../review_mining/csv_output/6_getaround/getaround_google_playstore_review.csv"
ola_path = "../review_mining/csv_output/7_olacabs/olacabs_google_playstore_review.csv"
taxieu_path = "../review_mining/csv_output/8_taxieu/taxieu_google_playstore_review.csv"
freenow_path = "../review_mining/csv_output/9_freenow/freenow_google_playstore_review.csv"
yandexgo_path = "../review_mining/csv_output/10_yandexgo/yandexgo_google_playstore_review.csv"

review_col = [4] # column that contains review text
number_of_rows = 100 # for testing, only used the first 100 rows of csv file

In [8]:
review_bolt = pd.read_csv(bolt_path, usecols=review_col, nrows = number_of_rows)
review_bolt = review_bolt.content.str.cat(sep='; ') # source = https://stackoverflow.com/a/33280080/2670476

review_uber = pd.read_csv(uber_path, usecols=review_col, nrows = number_of_rows)
review_uber = review_uber.content.str.cat(sep='; ') 

review_blablacar = pd.read_csv(blablacar_path, usecols=review_col, nrows = number_of_rows)
review_blablacar = review_blablacar.content.str.cat(sep='; ') 

review_cabify = pd.read_csv(cabify_path, usecols=review_col, nrows = number_of_rows)
review_cabify = review_cabify.content.str.cat(sep='; ') 

review_via = pd.read_csv(via_path, usecols=review_col, nrows = number_of_rows)
review_via = review_via.content.str.cat(sep='; ')

review_getaround = pd.read_csv(getaround_path, usecols=review_col, nrows = number_of_rows)
review_getaround = review_getaround.content.str.cat(sep='; ')

review_ola = pd.read_csv(ola_path, usecols=review_col, nrows = number_of_rows)
review_ola = review_ola.content.str.cat(sep='; ') 

review_taxieu = pd.read_csv(taxieu_path, usecols=review_col, nrows = number_of_rows)
review_taxieu = review_taxieu.content.str.cat(sep='; ') 

review_freenow = pd.read_csv(freenow_path, usecols=review_col, nrows = number_of_rows)
review_freenow = review_freenow.content.str.cat(sep='; ') 

review_yandexgo = pd.read_csv(yandexgo_path, usecols=review_col, nrows = number_of_rows)
review_yandexgo = review_yandexgo.content.str.cat(sep='; ')

In [9]:
review_yandexgo

'In general, the app is awesome, but there are a couple of issues which don\'t give me the desired user experience. 1. The map is delaying from the driver\'s map, and is lagging when the driver gets another route than the one on the map. 2. I can\'t add MasterCard cards which were issued by another bank other than YooMoney (the issue is that I have a YooMoney card and a Visa one issued by anoyher bank, and I want to add another card issued by the same bank, which is MasterCard, and I can\'t add it); Bad expirience in Chisinau. Price at the end of the trip differs from the offered at the beginning. In my case was +20%. My wife faced similar situation. It doesn\'t matter there\'s traffic jump or not.; This is the worst experience one can have with the Taxi app. Fares are almost always wrong, navigation doesn\'t work neither for driver, nor for user taxi is always in the wrong place and if you can\'t get to the place they are at the app still charges you full rate. Most of the employed dr

# Google Play Store Reviews

## Bolt

In [10]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_bolt)

CPU times: user 524 ms, sys: 2.02 s, total: 2.54 s
Wall time: 6min 3s


{'accept request',
 'accept request drive',
 'accept take ride',
 'accept untrained manner',
 'accord minute',
 'add chat room',
 'address program',
 'airport look',
 'allocate location pick',
 'allow book journey',
 'allow user browse',
 'appear box',
 'apply request ride',
 'appreciate promo',
 'arrival price',
 'arrive destination time fee',
 'arrive half way road',
 'arrive pick location charge',
 'arrive pickup',
 'arrive wait',
 'ask destination',
 'ask destination rider',
 'ask strange question',
 'assert legal right',
 'assist driver information',
 'attempt contact customer support',
 'avoid last trip',
 'avoid pick customer',
 'become 20min',
 'bolt client',
 'bolt ride',
 'call customer support',
 'cancel driver',
 'cancel job end book',
 'cancel ride',
 'cancel ride report issue',
 'cancel service',
 'cancel trip',
 'cancel trip book',
 'car color',
 'catch flight',
 'cause afraid thing',
 'change attitude',
 'change leave option',
 'charge high price',
 'charge place',
 'ch

## Uber

In [11]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_uber)

CPU times: user 384 ms, sys: 1.37 s, total: 1.76 s
Wall time: 4min 7s


{'4x rate original',
 'accept driver',
 'accept ride',
 'accept ride feel',
 'accept trip waste time',
 'accomplish simple task',
 'accuse customer',
 'add different credit card',
 'address issue',
 'agree ride',
 'allow select search',
 'area upgrade',
 'arrive charge',
 'ask charge value',
 'ask close issue',
 'ask money',
 'ask pin',
 'ask response charge credit',
 'ask wan',
 'attempt tip driver',
 'balance time book ride',
 'benifited trip',
 'book moto car',
 'book schedule today',
 'bounce ride request',
 'brilliance speed elegance',
 'cab service price',
 'cairn taxi mob',
 'call cab',
 'call customer care',
 'call support',
 'cancel book',
 'cancel charge',
 'cancel price order price',
 'cancel ride',
 'cancel ride book',
 'cancel time',
 'cancel trip',
 'cancel trip fare',
 'cancel trip n amount',
 'cancel trip section',
 'cancel trip uber',
 'care customer',
 'cause r',
 'charge ride',
 'cheat people trust',
 'come location call u',
 'come pick location',
 'come pick wait bu

## Blablacar

In [12]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_blablacar)

CPU times: user 605 ms, sys: 1.97 s, total: 2.58 s
Wall time: 6min 10s


{'afford fuel drive',
 'afford full cab charge',
 'allow location',
 'allow log',
 'appear screen',
 'application india turn something',
 'application user',
 'ask around 3k route',
 'ask double amount quote',
 'ask fair charge',
 'ask high price',
 'ask log stick',
 'ask money',
 'ask pupose visit',
 'attract customer mention',
 'block driver',
 'book hop ride',
 'boon covid',
 'bother trip detail',
 'budget blablacars',
 'business model india',
 'cab driver',
 'cab service',
 'cancel book',
 'cancel driver',
 'cancel driver promise',
 'cancel many number trip',
 'cancel plan travel',
 'cancel ride',
 'cancel ride book ride',
 'cancel ride dont travel',
 'cancel ride pickup time',
 'cancel travel plan',
 'cancel trip',
 'cancel trip block seat',
 'cancel trip journey',
 'canceling last hour',
 'car driver',
 'car nothing',
 'car owner',
 'car pool',
 'charge lot',
 'charge ride',
 'charge route',
 'choose language currency',
 'come customer',
 'come dont',
 'come time',
 'come way',
 

## Cabify

In [13]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_cabify)

CPU times: user 877 ms, sys: 3.39 s, total: 4.27 s
Wall time: 10min 29s


{'45min cab',
 '600mb data',
 'accept cash',
 'accept order',
 'accord customer service estimate',
 'add account',
 'add anything',
 'add bad customer service',
 'add payment method charge',
 'add ride start',
 'add spanish uk',
 'agree driver course',
 'agree fare mention',
 'airport trip',
 'allow pay',
 'allow pay cash cc',
 'amount time',
 'anything card charge',
 'apologize everybody meeting',
 'appear account preauthorization',
 'approve due eu regulation',
 'arrive charge',
 'arrive early meeting',
 'arrive sudden order',
 'ask change route',
 'ask code',
 'ask money',
 'ask refund',
 'ask wait',
 'authorization spanish',
 'avoid service stay uber',
 'base experience review cabify',
 'beginning trip',
 'bisol taxi ride',
 'blow battery life',
 'book cab',
 'book ride',
 'break developer care',
 'bus didnt',
 'business model',
 'business refuse',
 'cab phone',
 'cabify charge journey',
 'cabify experience',
 'cabify lot',
 'calculate journey price',
 'call taxi',
 'call text driv

## Via

In [14]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_via)

CPU times: user 803 ms, sys: 2.77 s, total: 3.57 s
Wall time: 8min 37s


{'accept cashapp',
 'accept payment',
 'account system',
 'add card',
 'add payment',
 'addition fact wait time',
 'address customer issue',
 'airport service',
 'align bullet trainsarrivals',
 'allow book ride',
 'allowed maximum fare way',
 'amaze fast pick time',
 'appreciate guy',
 'appreciate payment plan offer',
 'area dc',
 'area question',
 'arlington texas',
 'ask money',
 'ask rate ride',
 'assign driver',
 'authorization book seat',
 'base pricing neighborhood travel',
 'bit crapshot',
 'book car',
 'book ride',
 'bus route cut',
 'bust u turn',
 'bye guy',
 'call customer',
 'call customer support',
 'caltrain commute',
 'cancel charge',
 'cancel driver',
 'cancel ride',
 'cancel ride option car',
 'cancel ride order lyft',
 'cancelation fee decline',
 'car price tho',
 'catch credit card company',
 'cause late event driver',
 'charge ride book',
 'charge show fee',
 'city u',
 'come complicated user interface',
 'come distance',
 'come experience service year',
 'come merc

## Getaround

In [15]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_getaround)

CPU times: user 581 ms, sys: 2.24 s, total: 2.82 s
Wall time: 6min 59s


{'accept booking',
 'access message',
 'add option payment',
 'adjust due covid',
 'adjust price',
 'adjust price rental',
 'allow stuff access function',
 'alot owner crook',
 'amaze product',
 'appear search result',
 'appreciate idea',
 'area car stand',
 'arrange responsive connectivity',
 'arrive owner',
 'ask getaround',
 'ask receipt fuel filling',
 'authorisation car rental',
 'availability calender',
 'avoid company cost',
 'avoid pay',
 'avoid unless ton time',
 'block account',
 'book car',
 'book collection drop',
 'book vehicle',
 'booking car',
 'booking check',
 'booking misery',
 'booking time',
 'box work',
 'call multiple time',
 'call owner',
 'call service drivy',
 'cancel car freeze bone',
 'cancel rental',
 'cancel reservation',
 'cancel trip',
 'cancellation reason car',
 'car area',
 'car book drivy',
 'car drive car',
 'car half bar gasoline',
 'car hire company',
 'car owner',
 'car rental service',
 'car thing',
 'car time',
 'card work',
 'checking process',

## Ola

In [16]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_ola)

CPU times: user 704 ms, sys: 2.52 s, total: 3.22 s
Wall time: 7min 49s


{'accept request',
 'accept ride',
 'accept ride time car',
 'accept trip',
 'accept ur request destination',
 'account ola support team',
 'add cover',
 'add discount',
 'add money',
 'add ola money',
 'add popup',
 'allow book',
 'answer call happend',
 'appear payment section',
 'apply coupon',
 'apply end pay',
 'arrive destination',
 'ask cash',
 'ask cash payment car',
 'ask customer',
 'ask drop location',
 'ask drop place',
 'ask extra money',
 'ask final destination',
 'ask gor money cash',
 'ask happnd book car',
 'ask login',
 'ask money',
 'ask otp',
 'ask phone',
 'ask rating',
 'ask toll tax',
 'ask u',
 'assume ola money',
 'authorization fake company',
 'auto driver',
 'beg refund',
 'behaviour disgust guy',
 'beware people hacking account',
 'block payment',
 'blunder show payment detail',
 'boking charge r',
 'bolt kapten uber',
 'book airport',
 'book auto',
 'book auto cab refuse',
 'book cab',
 'book fare show',
 'book fraudency',
 'book initiate booking',
 'book o

## Taxi.eu

In [17]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_taxieu)

CPU times: user 416 ms, sys: 1.52 s, total: 1.94 s
Wall time: 4min 49s


{'accept order',
 'accept paypal payment',
 'add bank detail',
 'add guy girl',
 'airport charge',
 'alles user country',
 'allow contact driver',
 'allow pay',
 'amount quote traffic',
 'arrival time',
 'avoid kill battery',
 'believe horrible understatement',
 'blame u book',
 'book pay taxi',
 'book taxi',
 'book taxi didnt',
 'book taxi kindda purpose',
 'booking driver idea',
 'call support',
 'call taxi',
 'call taxi company',
 'cancel cab point',
 'cancel pick location ride',
 'cancel ride',
 'cancel sudden make service',
 'cancel u',
 'cancel without notice',
 'cancellation feature',
 'change language',
 'charged meter',
 'choose pay',
 'click say',
 'come google',
 'communicate driver',
 'compare offer',
 'compare uber',
 'confirm email',
 'confirm exact address',
 'confirm pickup',
 'connect u',
 'date satnav system',
 'decency respond anybody',
 'deliver promise',
 'developer response',
 'disappoint correct profile',
 'dispatch centre',
 'dispatch service',
 'driver claim',


## Free Now

In [18]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_freenow)

CPU times: user 555 ms, sys: 2.06 s, total: 2.61 s
Wall time: 6min 16s


{'accept card',
 'accept card method payment',
 'accept cash',
 'accept job',
 'accept next booking drive',
 'ad card',
 'addison lee',
 'allow pay card',
 'allow prebook',
 'android reply pick location',
 'anyone leave ring',
 'anyone test release',
 'argue pick point n',
 'arrive bangladesh pull',
 'arrive claim taxi',
 'arrive say',
 'arrive unlike call cab',
 'arrive wrong place u',
 'ask drive taxi',
 'ask login page',
 'ask taxi',
 'authorize bank',
 'autofill downside',
 'avoid cost',
 'bill multiple time',
 'bit trouble route',
 'book cab',
 'book make',
 'book taxi',
 'booking next morning',
 'cab fare',
 'call driver park',
 'calling result',
 'cancel dont show',
 'cancel ride',
 'cancel taxi',
 'cancel trip',
 'cancel trip leave',
 'cancel wrong charge',
 'card revolut aib',
 'carry pet',
 'carry supermarket thats',
 'change card',
 'charge cancellation fee',
 'charge leave charge fee',
 'check online taxi service',
 'claim right',
 'client ur expense',
 'come driver',
 'com

## Yandex Go

In [19]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_yandexgo)

CPU times: user 407 ms, sys: 1.32 s, total: 1.73 s
Wall time: 4min 11s


{'accept cash',
 'accept payment card',
 'add card payment',
 'add functionality taxi',
 'add mastercard card issue',
 'add option',
 'add payment method',
 'anvil delivery',
 'area map driver',
 'ask problem',
 'ask u annoy',
 'assist lot',
 'await time application',
 'bill tip',
 'burn cpu lot problem',
 'cancel taxi online payment',
 'car right',
 'card uzbekistan',
 'case anything',
 'change phone set account',
 'charge cc',
 'charge full rate',
 'charge problem',
 'charge wait time',
 'choose female driver',
 'contradict gps location',
 'couple issue',
 'courrier functionality',
 'credit card',
 'customer rat',
 'customer service',
 'customer service reply none',
 'customer support',
 'cuz data origin',
 'delete card uninstall',
 'depressed clean driver',
 'differ offer',
 'dismiss pop',
 'dismiss popup',
 'driver armenia',
 'driver display 810rsd',
 'driver experience',
 'driver gentleman',
 'driver kind cooperative',
 'driver mean',
 'driver phone work',
 'driver pick ride',
 'd