[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/enliktjioe/master-thesis-2021/blob/main/notebooks/safe_feature_extraction.ipynb)

# Preparation

- wget http://nlp.stanford.edu/software/stanford-postagger-full-2016-10-31.zip
- put in `references/re_2017_johann_et-al` (private files, licensed by the author)
- Update in `FE_Safe.py` variable `path_to_model` and `path_to_jar` with absolute path to its directory

 
**Required libraries**:
```
import nltk
nltk.download('stopwords')
nltk.download('punkt')
```

In [1]:
# Python path referencing
import os
import sys

module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path+"/references/re_2017_johann_et-al")
    
import FE_SAFE as fs
# print(sys.path)

## [Test] Manual Copy Paste Review 

In [3]:
%%time


example_review = """
        I love bolt. I don’t use uber often because one ride even if it’s short is like £11. Whereas from my high street to my house is £3. Not only that but their drivers are SUPER friendly! I was sick one day (my first time using bolt) and the driver was so understanding and encouraged me throughout my journey. Bolt is 100% recommended by me. I don’t write reviews so that’s how you know I defiantly recommend it. My Instagram name is: TeeKezi if you wish to get in contact with me about bolt. I’m not an ambassador 😂 or anything like that. Just super happy with the service I have been receiving x,5,Love it
    """

#     feature_extractor = SAFE()
feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(example_review)

CPU times: user 3.68 s, sys: 470 ms, total: 4.15 s
Wall time: 1min 9s


{'ambassador anything',
 'day driver understanding',
 'encourage throughout journey',
 'get contact bolt',
 'instagram name teekezi',
 'receive x',
 'street house',
 'write review'}

## Data Pre-Processing

In [4]:
import pandas as pd

In [54]:
bolt_path = "../review_mining/csv_output/1_bolt/bolt_google_playstore_review.csv"
uber_path = "../review_mining/csv_output/2_uber/uber_google_playstore_review.csv"
blablacar_path = "../review_mining/csv_output/3_blablacar/blablacar_google_playstore_review.csv"
cabify_path = "../review_mining/csv_output/4_cabify/cabify_google_playstore_review.csv"
via_path = "../review_mining/csv_output/5_via/via_google_playstore_review.csv"

getaround_path = "../review_mining/csv_output/6_getaround/getaround_google_playstore_review.csv"
ola_path = "../review_mining/csv_output/7_olacabs/olacabs_google_playstore_review.csv"
taxieu_path = "../review_mining/csv_output/8_taxieu/taxieu_google_playstore_review.csv"
freenow_path = "../review_mining/csv_output/9_freenow/freenow_google_playstore_review.csv"
yandexgo_path = "../review_mining/csv_output/10_yandexgo/yandexgo_google_playstore_review.csv"

review_col = [4] # column that contains review text
number_of_rows = 1 # for testing, only used the first 100 rows of csv file

In [55]:
review_bolt = pd.read_csv(bolt_path, usecols=review_col, nrows = number_of_rows)
review_bolt = review_bolt.content.str.cat(sep='; ') # source = https://stackoverflow.com/a/33280080/2670476

review_uber = pd.read_csv(uber_path, usecols=review_col, nrows = number_of_rows)
review_uber = review_uber.content.str.cat(sep='; ') 

review_blablacar = pd.read_csv(blablacar_path, usecols=review_col, nrows = number_of_rows)
review_blablacar = review_blablacar.content.str.cat(sep='; ') 

review_cabify = pd.read_csv(cabify_path, usecols=review_col, nrows = number_of_rows)
review_cabify = review_cabify.content.str.cat(sep='; ') 

review_via = pd.read_csv(via_path, usecols=review_col, nrows = number_of_rows)
review_via = review_via.content.str.cat(sep='; ')

review_getaround = pd.read_csv(getaround_path, usecols=review_col, nrows = number_of_rows)
review_getaround = review_getaround.content.str.cat(sep='; ')

review_ola = pd.read_csv(ola_path, usecols=review_col, nrows = number_of_rows)
review_ola = review_ola.content.str.cat(sep='; ') 

review_taxieu = pd.read_csv(taxieu_path, usecols=review_col, nrows = number_of_rows)
review_taxieu = review_taxieu.content.str.cat(sep='; ') 

review_freenow = pd.read_csv(freenow_path, usecols=review_col, nrows = number_of_rows)
review_freenow = review_freenow.content.str.cat(sep='; ') 

review_yandexgo = pd.read_csv(yandexgo_path, usecols=review_col, nrows = number_of_rows)
review_yandexgo = review_yandexgo.content.str.cat(sep='; ')

In [56]:
review_yandexgo

"In general, the app is awesome, but there are a couple of issues which don't give me the desired user experience. 1. The map is delaying from the driver's map, and is lagging when the driver gets another route than the one on the map. 2. I can't add MasterCard cards which were issued by another bank other than YooMoney (the issue is that I have a YooMoney card and a Visa one issued by anoyher bank, and I want to add another card issued by the same bank, which is MasterCard, and I can't add it)"

# Google Play Store Reviews

## Bolt

In [36]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_bolt)

CPU times: user 82 ms, sys: 271 ms, total: 353 ms
Wall time: 42.8 s


{'catch flight',
 'disappear show',
 'find driver',
 'go book',
 'pm today',
 'rid today',
 'thing notice prenook trip'}

## Uber

In [42]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_uber)

CPU times: user 38.6 ms, sys: 112 ms, total: 151 ms
Wall time: 18.5 s


{'cairn taxi mob', 'job guy', 'uber happy uber'}

## Blablacar

In [43]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_blablacar)

CPU times: user 61.5 ms, sys: 184 ms, total: 246 ms
Wall time: 30.4 s


{'afford full cab charge',
 'cab service',
 'run office',
 'send cab',
 'send car',
 'star bcoz'}

## Cabify

In [44]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_cabify)

CPU times: user 94.6 ms, sys: 287 ms, total: 382 ms
Wall time: 47.5 s


{'bisol taxi ride',
 'chilling winter',
 'deactivate leave',
 'get pay',
 'give email address',
 'mexico city',
 'submit form'}

## Via

In [45]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_via)

CPU times: user 28.6 ms, sys: 71.5 ms, total: 100 ms
Wall time: 10.9 s


{'know stop', 'want b pick'}

## Getaround

In [57]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_getaround)

CPU times: user 80.4 ms, sys: 255 ms, total: 335 ms
Wall time: 47.1 s


{'area car stand',
 'frustrating upload',
 'give star',
 'hang helpline',
 'idea top',
 'photo car data',
 'take photo car',
 'time assistance'}

## Ola

In [58]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_ola)

CPU times: user 47.1 ms, sys: 131 ms, total: 178 ms
Wall time: 38.4 s


{'account ola support team', 'login account', 'regard amit'}

## Taxi.eu

In [59]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_taxieu)

CPU times: user 51.8 ms, sys: 138 ms, total: 190 ms
Wall time: 40.3 s


{'pay amount order history', 'payment card work', 'time service'}

## Free Now

In [60]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_freenow)

CPU times: user 26.5 ms, sys: 51 ms, total: 77.5 ms
Wall time: 10.2 s


{'ask taxi', 'authorize bank', 'keep start', 'school way', 'time taxi'}

## Yandex Go

In [61]:
%%time

feature_extractor = fs.SAFE()
feature_extractor.extract_from_review(review_yandexgo)

CPU times: user 44.7 ms, sys: 109 ms, total: 154 ms
Wall time: 20.3 s


{'add mastercard card issue',
 'bank yoomoney',
 'couple issue',
 'give desire user experience'}