# References

- [Pandas: How to read CSV file from google drive public?](https://stackoverflow.com/a/56611995/2670476)

In [1]:
import pandas as pd
import yaml
import re

from langdetect import detect_langs, DetectorFactory

# https://pypi.org/project/langdetect/
# Language detection algorithm is non-deterministic, which means that if you try to run it on a text which is either too short or too ambiguous, you might get different results everytime you run it.
# To enforce consistent results, call following code before the first language detection:
DetectorFactory.seed = 0

In [2]:
config_file = open('config.yaml')
config = yaml.load(config_file, Loader=yaml.FullLoader)

In [3]:
def read_csv_from_gdrive(csvInput, columnToUsed = None):
    url='https://drive.google.com/uc?id=' + csvInput.split('/')[-2]
    df = pd.read_csv(url, usecols=columnToUsed, index_col=0)
    return df

In [4]:
def isEnglishReview(textInput):
#     print(textInput)
    
    a = detect_langs(textInput)
    listToStr = ','.join(map(str, a))
#     print(listToStr)
    
#     check after and before string
#     https://stackoverflow.com/questions/12572362/how-to-get-a-string-after-a-specific-substring
#     https://stackoverflow.com/questions/27387415/how-would-i-get-everything-before-a-in-a-string-python
    try:
        english_score = float((listToStr.partition('en:')[2]).partition(',')[0])
    except ValueError:
        english_score = 0
        
#     print(english_score)
    
    if english_score > 0.1:
#         print('English')
        isEnglish = True
    else:
#         print('Non-English')
        isEnglish = False
    
    return isEnglish, listToStr, english_score

# Exploratory Data Analysis

## Bolt

### Google Play Store

In [5]:
df = read_csv_from_gdrive(config['csv_input']['bolt_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 51907 

Total unique users : 46872
Total unknown users: 4780
Total users who gave multiple reviews: 255

Average rating for this app based on the textual reviews: 3.96 



### Apple App Store

In [6]:
df = read_csv_from_gdrive(config['csv_input']['bolt_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 3154 

Total unique users : 3149
Total users who gave multiple reviews: 5

Average rating for this app based on the textual reviews: 3.02 



## Uber

### Google Play Store

In [7]:
df = read_csv_from_gdrive(config['csv_input']['uber_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 10000 

Total unique users : 9911
Total unknown users: 26
Total users who gave multiple reviews: 63

Average rating for this app based on the textual reviews: 3.3 



### Apple App Store

In [8]:
df = read_csv_from_gdrive(config['csv_input']['uber_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 10342 

Total unique users : 10314
Total users who gave multiple reviews: 28

Average rating for this app based on the textual reviews: 2.96 



## Blablacar

### Google Play Store

In [9]:
df = read_csv_from_gdrive(config['csv_input']['blablacar_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 21172 

Total unique users : 19452
Total unknown users: 874
Total users who gave multiple reviews: 846

Average rating for this app based on the textual reviews: 4.3 



### Apple App Store

In [10]:
df = read_csv_from_gdrive(config['csv_input']['blablacar_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 23308 

Total unique users : 23243
Total users who gave multiple reviews: 65

Average rating for this app based on the textual reviews: 4.13 



## Cabify

### Google Play Store

In [11]:
df = read_csv_from_gdrive(config['csv_input']['cabify_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 3261 

Total unique users : 3047
Total unknown users: 204
Total users who gave multiple reviews: 10

Average rating for this app based on the textual reviews: 2.6 



### Apple App Store

In [12]:
df = read_csv_from_gdrive(config['csv_input']['cabify_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 7384 

Total unique users : 7373
Total users who gave multiple reviews: 11

Average rating for this app based on the textual reviews: 4.19 



## Via

### Google Play Store

In [13]:
df = read_csv_from_gdrive(config['csv_input']['via_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 1873 

Total unique users : 1756
Total unknown users: 110
Total users who gave multiple reviews: 7

Average rating for this app based on the textual reviews: 3.6 



### Apple App Store

In [14]:
df = read_csv_from_gdrive(config['csv_input']['via_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 2392 

Total unique users : 2389
Total users who gave multiple reviews: 3

Average rating for this app based on the textual reviews: 3.65 



## Getaround

### Google Play Store

In [15]:
df = read_csv_from_gdrive(config['csv_input']['getaround_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 731 

Total unique users : 701
Total unknown users: 31
Total users who gave multiple reviews: -1

Average rating for this app based on the textual reviews: 3.46 



### Apple App Store

In [16]:
df = read_csv_from_gdrive(config['csv_input']['getaround_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 2488 

Total unique users : 2482
Total users who gave multiple reviews: 6

Average rating for this app based on the textual reviews: 3.25 



## Ola Cabs

### Google Play Store

In [17]:
df = read_csv_from_gdrive(config['csv_input']['olacabs_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 10000 

Total unique users : 9470
Total unknown users: 354
Total users who gave multiple reviews: 176

Average rating for this app based on the textual reviews: 1.54 



### Apple App Store

In [18]:
df = read_csv_from_gdrive(config['csv_input']['olacabs_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 922 

Total unique users : 922
Total users who gave multiple reviews: 0

Average rating for this app based on the textual reviews: 2.19 



## Taxi.eu

### Google Play Store

In [19]:
df = read_csv_from_gdrive(config['csv_input']['taxieu_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 211 

Total unique users : 193
Total unknown users: 19
Total users who gave multiple reviews: -1

Average rating for this app based on the textual reviews: 2.8 



### Apple App Store

In [20]:
df = read_csv_from_gdrive(config['csv_input']['taxieu_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 564 

Total unique users : 561
Total users who gave multiple reviews: 3

Average rating for this app based on the textual reviews: 3.52 



## Free Now

### Google Play Store

In [21]:
df = read_csv_from_gdrive(config['csv_input']['freenow_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 11078 

Total unique users : 10245
Total unknown users: 758
Total users who gave multiple reviews: 75

Average rating for this app based on the textual reviews: 3.24 



### Apple App Store

In [22]:
df = read_csv_from_gdrive(config['csv_input']['freenow_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 14350 

Total unique users : 14320
Total users who gave multiple reviews: 30

Average rating for this app based on the textual reviews: 3.73 



## Yandex Go

### Google Play Store

In [23]:
df = read_csv_from_gdrive(config['csv_input']['yandexgo_google'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
unknown_users = len(df[df['userName']=='A Google user'])
mean = df['score'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total unknown users: {unknown_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users - unknown_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 7053 

Total unique users : 6626
Total unknown users: 382
Total users who gave multiple reviews: 45

Average rating for this app based on the textual reviews: 3.26 



### Apple App Store

In [24]:
df = read_csv_from_gdrive(config['csv_input']['yandexgo_apple'])
total_reviews = len(df)
unique_users  = len(df['userName'].unique())
mean = df['rating'].mean()

print(f'Total English reviews: {total_reviews} \n')
print(f'Total unique users : {unique_users}')
print(f'Total users who gave multiple reviews: {total_reviews - unique_users}\n')
print(f'Average rating for this app based on the textual reviews: {round(mean,2)} \n')

Total English reviews: 171 

Total unique users : 171
Total users who gave multiple reviews: 0

Average rating for this app based on the textual reviews: 2.73 

