### references
- aspect based sentiment analysis: https://github.com/ScalaConsultants/Aspect-Based-Sentiment-Analysis

### import packages

In [1]:
from utils import data_scraping, absa_english_text
import pandas as pd
import os

import warnings
warnings.filterwarnings('ignore')

2023-08-23 17:04:18.160390: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-08-23 17:04:18.160444: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-08-23 17:04:22.659293: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2023-08-23 17:04:22.659378: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2023-08-23 17:04:22.659409: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (CID-GiangTD13): /proc/driver/nvidia/version does not exist
2023-08-23 17:04:22.661701: I tensorflow/core/platform/cpu_feat

### read & transform data

In [2]:
# read data
file_name = 'data_the_grace_dalat_reviews.csv'
if os.path.exists(file_name):
    df = pd.read_csv(file_name, converters={'profileId':str})
    eval_cols = ['travelPurpose','travelKeywords','photoDataDisplaysList','reactionSummaries']
    df[eval_cols] = df[eval_cols].applymap(lambda x: eval(x) if isinstance(x, str) else x)
else:
    data = data_scraping(url_hotel="https://www.traveloka.com/vi-vn/hotel/vietnam/the-grace-hotel-dalat-3000010042556", reviews_per_page=10)
    df = data.get_all_reviews()
    df.to_csv(file_name, index=False)

In [3]:
# replace empty data to blank data
df = df.mask(df == '')

# drop columns with no data
df = df.dropna(axis=1, how='all')

# add new columns
df['travelPurposeText'] = df.travelPurpose.apply(lambda x: x['travelPurposeText'] if isinstance(x, dict) else x)
df['travelPurpose'] = df.travelPurpose.apply(lambda x: x['travelPurpose'] if isinstance(x, dict) else x)
df['travelKeywords'] = df.travelKeywords.apply(lambda x: ','.join(sorted(map(lambda y: y['travelKeyword'] if isinstance(y, dict) else '', x))) if isinstance(x, list) else x)
df['reviewLikes'] = df.reactionSummaries.apply(lambda x: x['reactionSummaryMap']['LIKE']['reactionCount'] if isinstance(x, dict) else x)
df['photoCategories'] = df.photoDataDisplaysList.apply(lambda x: ','.join(sorted(map(lambda y: y['photoCategoryDisplay']['photoCategory'] if isinstance(y, dict) else '', x))) if isinstance(x, list) else x)

In [4]:
output = []
for id, row in enumerate(df.to_dict(orient='records')[:15]):
    reviewTextFn = ''
    absa_class = absa_english_text(row['originalReviewText'])
    if row['translated']:
        reviewTextFn = row['reviewText']
    else:
        tokenized = absa_class.words_tokenized()
        reviewTextFn = absa_class.translate_vi_to_en(tokenized)
    
    sentiments = absa_class.absa_by_np(reviewTextFn)
    output.append({
        'reviewId': row['reviewId'],
        'sentiment':sentiments
    })
    print('---', id)

--- 0
--- 1
--- 2
--- 3
--- 4
--- 5
--- 6
--- 7
--- 8
--- 9
--- 10
--- 11
--- 12
--- 13
--- 14


In [20]:
[1] * 5

[1, 1, 1, 1, 1]

In [27]:
df_output = pd.DataFrame(output)
df_output['sentiment'] = df_output.apply(lambda x: pd.DataFrame(x.sentiment).assign(reviewId = x.reviewId) , axis=1)
df_output = pd.concat(df_output.sentiment.tolist())

In [37]:
df_output.query("sentiment=='negative'").text.tolist()

[' Clean room, friendly, enthusiastic staff. Late check out surcharge 30% of room rate (12pm-4pm)',
 "Enthusiastic reception, rent a scooter 120k, calculated according to the rental hour, if I rent a car from 12 o'clock, it will be charged for half a day. Lock the room with a magnetic card. The plus point is early check-in at no extra charge. The minus point is that the soundproofing is not good, hearing echoes from other rooms. The room has 2 standard free bottles of water per day with tea and coffee. Strong hot water, glass-walled bathroom.",
 "Enthusiastic reception, rent a scooter 120k, calculated according to the rental hour, if I rent a car from 12 o'clock, it will be charged for half a day. Lock the room with a magnetic card. The plus point is early check-in at no extra charge. The minus point is that the soundproofing is not good, hearing echoes from other rooms. The room has 2 standard free bottles of water per day with tea and coffee. Strong hot water, glass-walled bathroom."

In [38]:
df_output.query("sentiment=='negative'")

Unnamed: 0,text,aspect,sentiment,neu_score,neg_score,pos_score,reviewId
2,"Clean room, friendly, enthusiastic staff. Lat...",surcharge,negative,0.093093,0.849673,0.057234,126414680
0,"Enthusiastic reception, rent a scooter 120k, c...",reception,negative,0.142583,0.546848,0.310569,126150640
1,"Enthusiastic reception, rent a scooter 120k, c...",scooter,negative,0.086209,0.84392,0.069871,126150640
2,"Enthusiastic reception, rent a scooter 120k, c...",120k,negative,0.272433,0.62586,0.101707,126150640
3,"Enthusiastic reception, rent a scooter 120k, c...",hour,negative,0.485034,0.49618,0.018786,126150640
4,"Enthusiastic reception, rent a scooter 120k, c...",car,negative,0.183335,0.753425,0.06324,126150640
14,"Enthusiastic reception, rent a scooter 120k, c...",soundproofing,negative,0.001935,0.987775,0.010291,126150640
15,"Enthusiastic reception, rent a scooter 120k, c...",hearing,negative,0.00144,0.989121,0.009439,126150640
2,"Friendly staff, close to the center, clean roo...",room,negative,0.072778,0.728857,0.198365,126384376
4,"Friendly staff, close to the center, clean roo...",outside,negative,0.00647,0.987551,0.005979,126384376
