In [151]:
import pandas as pd
import numpy as np

In [152]:
# read in df_english.csv
df = pd.read_csv("./df_english.csv")
df.head()

Unnamed: 0.1,Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,is_english
0,0,86ff1ea1-0b63-43ce-addc-eb43f6193b3b,Yaseen Yaseen,https://play-lh.googleusercontent.com/a/ALm5wu...,Yaeen Yaeen gg,5,0,,2022-10-04 20:32:28,,,True
1,1,3577f7a1-3394-4e77-813d-095a82cf8bcf,Kemar Richardson,https://play-lh.googleusercontent.com/a-/ACNPE...,Great,5,0,26.3.4,2022-10-04 20:32:10,,,True
2,2,7c8c56d9-d8ad-47d4-b24b-5289aa4529ff,Tracy Dunn,https://play-lh.googleusercontent.com/a/ALm5wu...,good,5,0,26.4.3,2022-10-04 20:31:21,,,True
3,3,80db804f-cccd-4b09-b690-abc12cbf0612,SG. Mugo. (Mugoz:),https://play-lh.googleusercontent.com/a-/ACNPE...,Good app,5,0,26.3.4,2022-10-04 20:30:22,,,True
4,4,4ed35e90-0f45-4865-81c4-b3a6f2ea49f7,Mwansa Judy,https://play-lh.googleusercontent.com/a-/ACNPE...,Most amazing app,5,0,26.3.4,2022-10-04 20:29:25,,,True


In [153]:
# split 10% of data as test dataset
test_df = df.sample(frac=0.1, random_state=1)
test_df.head()
# save test_df as csv file
test_df.to_csv("test_df.csv", index=False)

# remove test_df from df to create train dataset for modeling
train_df = df.drop(test_df.index)   
train_df.head()
# save train_df as csv file
train_df.to_csv("train_df.csv", index=False)


In [154]:
# Function to convert score to sentiment
def to_sentiment(rating):
    
    rating = int(rating)
    
    # Convert to class
    if rating <= 2:
        return 0
    elif rating == 3:
        return 1
    else:
        return 2

# Apply to the dataset 
train_df['sentiment'] = df.score.apply(to_sentiment)

0 means negative
1 means neutral
2 means positive

In [155]:
train_df.head()

Unnamed: 0.1,Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,is_english,sentiment
0,0,86ff1ea1-0b63-43ce-addc-eb43f6193b3b,Yaseen Yaseen,https://play-lh.googleusercontent.com/a/ALm5wu...,Yaeen Yaeen gg,5,0,,2022-10-04 20:32:28,,,True,2
1,1,3577f7a1-3394-4e77-813d-095a82cf8bcf,Kemar Richardson,https://play-lh.googleusercontent.com/a-/ACNPE...,Great,5,0,26.3.4,2022-10-04 20:32:10,,,True,2
2,2,7c8c56d9-d8ad-47d4-b24b-5289aa4529ff,Tracy Dunn,https://play-lh.googleusercontent.com/a/ALm5wu...,good,5,0,26.4.3,2022-10-04 20:31:21,,,True,2
3,3,80db804f-cccd-4b09-b690-abc12cbf0612,SG. Mugo. (Mugoz:),https://play-lh.googleusercontent.com/a-/ACNPE...,Good app,5,0,26.3.4,2022-10-04 20:30:22,,,True,2
5,5,bd35bbe9-73c9-4e17-acaf-7aa1a71caed3,Muhammad Sajid,https://play-lh.googleusercontent.com/a-/ACNPE...,full Entertainment,4,0,25.9.4,2022-10-04 20:28:58,,,True,2


In [156]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import torch
from sklearn.metrics import accuracy_score, precision_recall_fscore_support
import re

In [157]:
tokenizer = AutoTokenizer.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")
model = AutoModelForSequenceClassification.from_pretrained("nlptown/bert-base-multilingual-uncased-sentiment")


In [158]:
def sentiment_score(review):
    tokens = tokenizer(review, return_tensors="pt", padding=True, truncation=True)
    result = model(**tokens)

    final = int(torch.argmax(result.logits))+1

    if final <= 2:
        return 0
    elif final == 3:
        return 1
    else:
        return 2

In [159]:
# for range 100 to 120 in train_df.text check
for i in range(150,170):
    # print()
    print(train_df['content'].iloc[i], sentiment_score(train_df['content'].iloc[i]))
    # print()
# sentiment_score("i love it so good movie the food is fresh and tasty")

It is the best 2
Love this apps 2
I love this app 2
Absolutely good 2
AK Rajput 821 2
Its good app 2
Qasim Damraeya 1
The absolute best app in Google Play Store 2
Nice 2
Please tik tok team virel ma video please 0
This app is best but not like good 1
Nyc 2
Funn app 2
Good job 2
For You please 2
Sardar_salmankhan110 2
This app not good because this app inside very sexual contant so much bad contant or so much video are same 0
Very addictive but fun 2
This app is amazing  2


In [160]:
# only keep 10k records from test_df
test_df = test_df.sample(n=2000)
test_df.shape


# if "is_english" column is false(i.e it is in Spanish) for cell, drop that row to df_english.csv file.
for index, row in test_df.iterrows():
    if row["is_english"] == False :
        test_df.drop([index], inplace=True) 

# create sentiment column for test_df
test_df['sentiment'] = df.score.apply(to_sentiment)

test_df.index = range(len(test_df))

test_df.head(10)

Unnamed: 0.1,Unnamed: 0,reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,is_english,sentiment
0,10395,9c34ea87-dfaf-481b-b085-765d4e3f55f8,Raj Das,https://play-lh.googleusercontent.com/a/ALm5wu...,Gd app,5,0,26.3.3,2022-09-30 15:51:06,,,True,2
1,8581,fec54490-aaa4-4503-ab0f-ed3282c44551,Damilola Joel,https://play-lh.googleusercontent.com/a-/ACNPE...,Nice,4,0,26.2.41,2022-10-01 09:57:43,,,True,2
2,15459,1f93cefc-ff0b-475b-b3ca-f138dc6c033b,Umar Ali Ali,https://play-lh.googleusercontent.com/a/ALm5wu...,Hl,1,0,25.7.7,2022-09-28 08:53:59,,,True,0
3,31160,2e6f711e-fb20-4863-8626-9566bfea52e7,oboh beatrice,https://play-lh.googleusercontent.com/a/ALm5wu...,Perfect,5,0,25.8.5,2022-09-21 20:56:08,,,True,2
4,93947,dacd2d85-56c7-4047-b6f9-cf8ddd3b6986,Priscah Wairimu,https://play-lh.googleusercontent.com/a/ALm5wu...,It wont update on my device,1,0,22.7.4,2022-08-31 18:14:27,,,True,0
5,34,1786f807-ef7f-4001-88c4-4ffd71edcd02,Ejaz ALLAH Yar,https://play-lh.googleusercontent.com/a-/ACNPE...,Good,5,0,,2022-10-04 20:09:34,,,True,2
6,42554,4d013039-01e6-4aec-a728-327d4f2c8cae,Badon Mir,https://play-lh.googleusercontent.com/a/ALm5wu...,My life best app in tiktok i love tik tok plus...,5,0,26.2.1,2022-09-17 16:10:28,,,True,2
7,97670,790af1ef-8bfd-42c2-b0d2-6ab6aa111324,رحمه رحمه,https://play-lh.googleusercontent.com/a/ALm5wu...,Robi robi,5,0,25.8.5,2022-08-30 20:07:40,,,True,2
8,7900,e4fa38c9-cd9a-428a-94c4-01e118869b31,Qurban Ali,https://play-lh.googleusercontent.com/a/ALm5wu...,Nice,5,0,26.0.3,2022-10-01 17:08:10,,,True,2
9,5807,6579bcb9-04a7-440d-a87d-dd84817799f1,Joseph Ingram,https://play-lh.googleusercontent.com/a/ALm5wu...,I love it,5,0,,2022-10-02 13:25:36,,,True,2


In [170]:
accuracies = []
for i in test_df.index[:1000]:
    
    # map sentiments to "positive", "neutral" or "negative"
    # if test_df.sentiment[i] <= 2:
    #     result = 0
    # elif test_df.sentiment[i] == 3:
    #     result = 1
    # elif test_df.sentiment[i] >= 4:
    #     result = 2

    # append value to new column "prediction"
    print(train_df['sentiment'].iloc[i], train_df['content'].iloc[i], sentiment_score(test_df['content'].iloc[i]))
    test_df["prediction"] = sentiment_score(test_df['content'].iloc[i])

    if(test_df['prediction'].iloc[i] == test_df['sentiment'].iloc[i]):
        accuracies.append(1)
    else:
        accuracies.append(0)
accuracy = (sum(accuracies)/len(accuracies))*100
print(accuracy)

2 Yaeen Yaeen gg 2
2 Great 2
2 good 1
2 Good app 2
2 full Entertainment 0
2 Super and good app 2
2 nc 2
2 Tiktok is a nice app 2
0 1qwertyuiop 2
0 Need my Tik tok open note  2
2 I enjoy it seeing some of videos and following friends 2
1 My favorits vidio 2
2 Wow 1
2 I love tiktok its a great app 2
0 I dont know whats going on anymore cause i cant download tik tok anymore i must download tik tok lite 0
2 Awesome app Ive ever had its the best to have 2
2 I really like titok because its fun  2
0 When i make videos it keeps banneding my videosand i keep on starting making videos and account 0
0 Dufv 2
2 Amazing  2
1 Very good  2
2 Good 2
2 Its an amazing app I havent seen any problem with it But for the last few days m facing an issue its not showing capcut edit effects 2
2 e 0
2 ForYou Main Bhi Dall Dety Tu Kia Bat Thi 2
2 I love TikTok and have been using it daily for a few months until yesterday When I try to record a TikTok the audio is so low that even turned all the way up in the app

In [None]:
sentiment_score("this app sucks")

0