In [1]:
from bertopic import BERTopic
import pandas as pd

# Functions

In [2]:
def visualize_version(model, df, class_model, versions, threshold=1, width=1200, height=400, remove_outliers=False, sort_yaxes=False):
    """
    Parameters:
        model:          topic_model
        df:             review_dataset
        class_model:    class model created using the topic_model.topics_per_class() method
        versions:       array object with the version names
        threshold:      remove topics with freq below threshold
        width:          width of the graph
        height:         height of the graph

    Usage:

    """

    df = class_model[class_model['Class'].isin(versions)].sort_values(by=['Frequency'], ascending=False)
    if threshold:
        df = df[df['Frequency'] >= threshold]
    if remove_outliers:
        df = df[df['Topic'] != -1]

    topic_list = list(dict.fromkeys(list(df['Topic'])))

    graph = BERTopic.visualize_topics_per_class(model, df, topics=topic_list)

    graph.update_layout(margin=dict(l=20, r=20, t=20, b=20), width=width, height=height)

    graph.update_yaxes(categoryorder='total descending')

    if sort_yaxes:
        graph.update_yaxes(categoryorder='array', categoryarray=versions)

    return graph

In [3]:
def get_documents_version(model, df, version, topic_num, connect='content_corrected'):

    df_test2 = pd.merge(model.get_document_info(df['content_corrected']), df, left_on='Document', right_on=connect)

    version_specific = df_test2[df_test2['reviewCreatedVersion'] == version][['content', 'content_corrected', 'Topic']]

    return list(version_specific[version_specific['Topic'] == topic_num]['content'])

In [4]:
def get_document_info_expand(model, df):

    return pd.merge(model.get_document_info(df['content_corrected']), df, left_on='Document', right_on='content_corrected')

# Load data

In [5]:
df_netflix = pd.read_csv(f'preprocessed_data/prep_netflix_v4.csv')
df_youtube = pd.read_csv(f'preprocessed_data/prep_youtube_v4.csv')
df_whatsapp = pd.read_csv(f'preprocessed_data/prep_whatsapp_v4.csv')
df_paypal = pd.read_csv(f'preprocessed_data/prep_paypal_v4.csv')
df_amazon = pd.read_csv(f'preprocessed_data/prep_amazon_v4.csv')

  df_amazon = pd.read_csv(f'preprocessed_data/prep_amazon_v4.csv')


# Load old models

In [6]:
topic_model_netflix = BERTopic.load('models/topicmodel_netflix_hdbscan_v1.model')
topic_model_youtube = BERTopic.load('models/topicmodel_youtube_hdbscan_v1.model')
topic_model_whatsapp = BERTopic.load('models/topicmodel_whatsapp_hdbscan_v1.model')
topic_model_paypal = BERTopic.load('models/topicmodel_paypal_hdbscan_v1.model')
topic_model_amazon = BERTopic.load('models/topicmodel_amazon_hdbscan_v1.model')

# Load new models (Outliers reduced)

In [42]:
topic_model_netflix = BERTopic.load('models/topicmodel_netflix_hdbscan_v2.model')
topic_model_youtube = BERTopic.load('models/topicmodel_youtube_hdbscan_v2.model')
topic_model_whatsapp = BERTopic.load('models/topicmodel_whatsapp_hdbscan_v2.model')
topic_model_paypal = BERTopic.load('models/topicmodel_paypal_hdbscan_v2.model')
topic_model_amazon = BERTopic.load('models/topicmodel_amazon_hdbscan_v2.model')

# Visualization

## Netflix Version Specific Analysis

In [7]:
classes_complete_netflix = list(df_netflix['reviewCreatedVersion'])
versions = list(set(classes_complete_netflix))
versions.sort(key=lambda x: list(map(int, x.split(' ')[0].split('.'))))
versions

['1.8.0 build 561',
 '2.0.2 build 665',
 '2.2.0 build 843',
 '2.4.0 build 939',
 '2.4.1 build 950',
 '3.0.2 build 1042',
 '3.1.1 build 1112',
 '3.1.2 build 1142',
 '3.1.4 build 1156',
 '3.1.5 build 1714',
 '3.1.8 build 1728',
 '3.2.0 build 1340',
 '3.2.1 build 1346',
 '3.3.2 build 1467',
 '3.4.1 build 1506',
 '3.5.0 build 1564',
 '3.6.3 build 1633',
 '3.7.0 build 1672',
 '3.7.1 build 1711',
 '3.7.2 build 1755',
 '3.8.0 build 3832',
 '3.8.1 build 3868',
 '3.8.2 build 3903',
 '3.8.3 build 3955',
 '3.9.1 build 4105',
 '3.9.3 build 4284',
 '3.10.2 build 4360',
 '3.11.1 build 4421',
 '3.12.0 build 4529',
 '3.12.2 build 5042',
 '3.13.0 build 5107',
 '3.14.0 build 5154',
 '3.14.1 build 5179',
 '3.14.2 build 5186',
 '3.14.3 build 5200',
 '3.15.3 build 5269',
 '3.16.0 build 5294',
 '3.16.1 build 5342',
 '3.16.2 build 5358',
 '3.16.3 build 5359',
 '3.16.6 build 5383',
 '3.16.6 build 5382',
 '4.0.0 build 5523',
 '4.2.0 build 5952',
 '4.2.1 build 5986',
 '4.3.0 build 6142',
 '4.3.1 build 6151',
 '

In [8]:
topics_per_class_netflix = topic_model_netflix.topics_per_class(df_netflix['content_corrected'], classes=classes_complete_netflix)

0it [00:00, ?it/s]

442it [00:17, 24.57it/s]


In [9]:
df_info_netflix = get_document_info_expand(topic_model_netflix, df_netflix)

In [10]:
list(df_info_netflix[df_info_netflix['reviewCreatedVersion'] == '8.64.0 build 8 50394'][df_info_netflix[df_info_netflix['reviewCreatedVersion'] == '8.64.0 build 8 50394']['Topic'] == -1]['Document'])

 'app not working. will not load. service too expensive. supposed to be able to use when traveling. requires you to set your home location once a month through the app that will not work. so disappointed. price keeps going up with more and more restrictions and we still have less programming available compared to other countries and pay more money.',
 "undo or fix the recent update which has totally changed the navigation of the app and its worse than ever to use, images missing, auto loads the what everyone's watching instead of letting me scroll through what i want. fix it please",
 "absolutely horrible user interface on both mobile and desktop and they keep making it worse. it's overwhelming and searches don't produce relevant results. i find myself using netflix less and less.",
 'like to add some movies which are not available on netflix app like the raid of redemption, avengers, and some other marvel movies... i have requested on call center of netflix but no response.',
 "netfli

In [11]:
len(list(set(df_info_netflix[df_info_netflix['reviewCreatedVersion'] == '8.64.0 build 8 50394']['Topic'])))

78

### Visualize topics per version

In [21]:
visualize_version(
    topic_model_netflix,
    df_info_netflix,
    topics_per_class_netflix,
    ['8.64.0 build 8 50394'],
    height=450,
    threshold=3,
    remove_outliers=True
    )

In [17]:
get_documents_version(topic_model_netflix, df_netflix, '8.64.0 build 8 50394', 4)

["The new app layout SUCKS. The home page no longer exists. So getting to your list is now inconvenient to get to and the continue watching is now in the explore tab. These two things are what I would prefer to see first when I open the app. But now what I first see is the new & hot tab and that is just scrolling though the trailers of what everyone's watching, coming son, games, and the top ten movies/shows.",
 'Absolutely hate the new interface. It was much better earlier, was easy and comforting to scroll accross. Netflix was very unique because of that and now its like a common app. Absolutely disappointed.',
 'Give an option to change the interface, coz the new interface is very confusing. It shows very limited items on display, the old one was better.',
 'The new update literally just moved the most used tab to the center and replaced it with the upcoming... Which it already had a tab for! Now we have to click search to continue watching ongoing shows? Seems like a step backwards

### Visualize topics of multiple versions

In [37]:
visualize_version(
    topic_model_netflix,
    df_info_netflix,
    topics_per_class_netflix,
    [x for x in versions if x.split('.')[0] == '8'],
    remove_outliers=True,
    sort_yaxes=True
    )

## YouTube Version Specific Analysis

In [43]:
classes_complete_youtube = list(df_youtube['reviewCreatedVersion'])
versions = list(set(classes_complete_youtube))
versions.sort(key=lambda x: list(map(int, x.split('.'))))

In [44]:
versions

['2.1.6',
 '2.4.4',
 '3.5.5',
 '4.0.23',
 '4.1.23',
 '4.2.16',
 '4.4.11',
 '4.5.17',
 '5.1.10',
 '5.2.27',
 '5.3.28',
 '5.3.32',
 '5.5.27',
 '5.5.30',
 '5.6.36',
 '5.7.38',
 '5.7.41',
 '5.9.0.13',
 '5.10.1.5',
 '5.10.3.5',
 '5.17.6',
 '6.0.13',
 '10.03.5',
 '10.05.6',
 '10.08.53',
 '10.14.56',
 '10.18.55',
 '10.25.57',
 '10.29.52',
 '10.31.55',
 '10.34.57',
 '10.37.58',
 '10.43.60',
 '10.47.55',
 '10.49.59',
 '11.01.56',
 '11.01.70',
 '11.04.56',
 '11.07.59',
 '11.10.60',
 '11.12.57',
 '11.13.56',
 '11.16.62',
 '11.17.52',
 '11.19.56',
 '11.22.56',
 '11.23.56',
 '11.25.55',
 '11.25.59',
 '11.27.53',
 '11.29.53',
 '11.29.55',
 '11.32.53',
 '11.33.58',
 '11.35.60',
 '11.38.54',
 '11.39.56',
 '11.41.56',
 '11.43.52',
 '11.43.54',
 '11.45.59',
 '11.47.57',
 '11.49.55',
 '12.01.55',
 '12.03.57',
 '12.05.53',
 '12.07.63',
 '12.09.58',
 '12.10.60',
 '12.11.57',
 '12.13.53',
 '12.14.56',
 '12.15.55',
 '12.16.55',
 '12.16.56',
 '12.17.54',
 '12.19.56',
 '12.21.57',
 '12.23.60',
 '12.25.54',
 '1

In [45]:
topics_per_class_youtube = topic_model_youtube.topics_per_class(df_youtube['content_corrected'], classes=classes_complete_youtube)

755it [00:25, 29.87it/s]


In [46]:
df_info_youtube = get_document_info_expand(topic_model_youtube, df_youtube)

In [47]:
df_info_youtube[df_info_youtube['reviewCreatedVersion'] == '18.14.39']

Unnamed: 0,Document,Topic,Name,Top_n_words,Probability,Representative_document,reviewId,userName,userImage,content,...,reviewCreatedVersion,at,replyContent,repliedAt,major_version,token_length,doc_length,content_corrected,lan_code,contains_only_english
448,"i can't play kid shows for my little, says my ...",10,10_crashing_freezes_crashes_freezing,crashing - freezes - crashes - freezing - load...,0.013476,False,ebc4cab2-185a-4949-9419-89a8b7d9ecdb,Alivia England,https://play-lh.googleusercontent.com/a/AGNmyx...,"I can't play kid shows for my littled, says my...",...,18.14.39,2023-04-12T10:18:26,,,18,65,364,"i can't play kid shows for my little, says my ...",['English'],True
480,the contents in yt shorts doesn't refresh. alw...,894,894_refresh_shorts_refreshing_rainplayz,refresh - shorts - refreshing - rainplayz - re...,0.038172,True,2c80637a-9660-4301-90aa-d77f8489cfa3,ALIVE WandereR,https://play-lh.googleusercontent.com/a-/ACB-R...,The contents in yt shorts doesn't refresh. Alw...,...,18.14.39,2023-04-12T10:49:24,,,18,40,213,the contents in yt shorts doesn't refresh. alw...,['English'],True
823,"almost a year ago it was the dislike button, n...",3,3_dislike_dislikes_count_removal,dislike - dislikes - count - removal - button ...,0.011958,False,e80d5ec6-d273-4401-ad38-940719cca92d,Joshua Sexton,https://play-lh.googleusercontent.com/a-/ACB-R...,"Almost a year ago it was the dislike button, n...",...,18.14.39,2023-04-12T19:04:31,,,18,56,290,"almost a year ago it was the dislike button, n...",['English'],True
2690,seriously? i am getting updates in like every ...,4,4_store_open_update_uninstall,store - open - update - uninstall - playstore ...,0.011457,False,04af892c-5d81-47fc-afb6-a5429fcac9df,Demonstein X,https://play-lh.googleusercontent.com/a-/ACB-R...,Seriously? I am getting updates in like every ...,...,18.14.39,2023-04-12T17:41:17,,,18,41,228,seriously? i am getting updates in like every ...,['English'],True
2698,it's a great app you can pretty much watch wha...,92,92_advertisement_boar_ads_lots,advertisement - boar - ads - lots - adverting ...,0.013763,False,b90f9a92-1fef-478e-a1b2-e9080560e498,Luke Priester,https://play-lh.googleusercontent.com/a-/ACB-R...,It's a great app you can pretty much watch wha...,...,18.14.39,2023-04-13T00:33:04,,,18,24,126,it's a great app you can pretty much watch wha...,['English'],True
10483,"got overwhelmed by the ads, its super annoying",-1,-1_you_app_premium_like,you - app - premium - like - that - good - and...,0.431841,False,42211a5e-82af-45da-a971-44227548561d,fri fai max,https://play-lh.googleusercontent.com/a-/ACB-R...,"got overwhelmed by the ads, its super annoying",...,18.14.39,2023-04-13T03:45:36,,,18,8,46,"got overwhelmed by the ads, its super annoying",['English'],True
17134,you tube shouldn't have any commercials when i...,17,17_study_learn_learning_knowledge,study - learn - learning - knowledge - student...,0.010394,False,e2e6012b-60ab-4ad8-b56c-0882166c492d,Rain Santiago,https://play-lh.googleusercontent.com/a-/ACB-R...,You tube shouldn't have any commercials when i...,...,18.14.39,2023-04-12T16:40:18,,,18,13,78,you tube shouldn't have any commercials when i...,['English'],True
24171,too much ads and it causes trauma and anxiety ...,1,1_ads_ad_advertisements_too,ads - ad - advertisements - too - many - adver...,0.086778,False,4d61bdf7-7149-4c88-8624-ba6e9bc939cb,JOHN MICHAEL AARON CO,https://play-lh.googleusercontent.com/a-/ACB-R...,TOO MUCH ADS AND IT CAUSES TRAUMA AND ANXIETY ...,...,18.14.39,2023-04-12T18:33:07,,,18,11,54,too much ads and it causes trauma and anxiety ...,['English'],True
24192,youtube app kept dim and play button wont go away,48,48_pause_forward_minimize_screen,pause - forward - minimize - screen - window -...,0.021677,False,1fbe6cc2-134f-4170-aa81-64b4547cdf9f,Dr. Isaac Andy,https://play-lh.googleusercontent.com/a-/ACB-R...,youtube app kept dim and play button wont go away,...,18.14.39,2023-04-12T09:54:31,,,18,10,49,youtube app kept dim and play button wont go away,['English'],True
77794,who created the latest version this time not ...,10,10_crashing_freezes_crashes_freezing,crashing - freezes - crashes - freezing - load...,0.013228,False,6ecd5d93-878f-4bed-8e9e-323b18c7c52c,durgashankar barik,https://play-lh.googleusercontent.com/a-/ACB-R...,Who created the latest version this time not ...,...,18.14.39,2023-04-12T17:18:40,,,18,11,69,who created the latest version this time not ...,['English'],True


### Visualize topics per version

In [48]:
visualize_version(
    topic_model_youtube,
    df_info_youtube,
    topics_per_class_youtube,
    ['18.14.37'],
    threshold=2
)

In [51]:
get_documents_version(topic_model_youtube, df_youtube, '18.14.37', 11)

['YouTube videos continue to auto play while watching shorts forcing you to stop watching them.',
 "My shorts are too big. It's the size of my full screen."]

## WhatsApp Version Specific analysis

In [52]:
classes_complete_whatsapp = list(df_whatsapp['reviewCreatedVersion'])
versions = list(set(classes_complete_whatsapp))
versions.sort(key=lambda x: list(map(int, x.split('.'))))

In [53]:
versions

['2.9.1547',
 '2.11.152',
 '2.11.186',
 '2.11.238',
 '2.11.301',
 '2.11.432',
 '2.11.468',
 '2.11.476',
 '2.11.505',
 '2.11.528',
 '2.11.544',
 '2.12.5',
 '2.12.69',
 '2.12.84',
 '2.12.94',
 '2.12.124',
 '2.12.158',
 '2.12.176',
 '2.12.250',
 '2.12.304',
 '2.12.317',
 '2.12.365',
 '2.12.367',
 '2.12.449',
 '2.12.453',
 '2.12.510',
 '2.16.13',
 '2.16.57',
 '2.16.89',
 '2.16.95',
 '2.16.133',
 '2.16.207',
 '2.16.225',
 '2.16.259',
 '2.16.275',
 '2.16.297',
 '2.16.306',
 '2.16.310',
 '2.16.325',
 '2.16.352',
 '2.16.382',
 '2.16.389',
 '2.16.396',
 '2.16.399',
 '2.17.24',
 '2.17.65',
 '2.17.69',
 '2.17.78',
 '2.17.79',
 '2.17.107',
 '2.17.126',
 '2.17.145',
 '2.17.146',
 '2.17.190',
 '2.17.223',
 '2.17.252',
 '2.17.253',
 '2.17.254',
 '2.17.271',
 '2.17.296',
 '2.17.323',
 '2.17.339',
 '2.17.351',
 '2.17.366',
 '2.17.395',
 '2.17.412',
 '2.17.427',
 '2.17.440',
 '2.17.442',
 '2.18.9',
 '2.18.13',
 '2.18.29',
 '2.18.39',
 '2.18.46',
 '2.18.54',
 '2.18.64',
 '2.18.65',
 '2.18.79',
 '2.18.92'

In [54]:
topics_per_class_whatsapp = topic_model_whatsapp.topics_per_class(df_whatsapp['content_corrected'], classes=classes_complete_whatsapp)

527it [00:13, 39.37it/s]


In [55]:
df_info_whatsapp = get_document_info_expand(topic_model_whatsapp, df_whatsapp)

In [56]:
df_info_whatsapp[df_info_whatsapp['reviewCreatedVersion'] == '2.23.7.14']

Unnamed: 0,Document,Topic,Name,Top_n_words,Probability,Representative_document,reviewId,userName,userImage,content,...,reviewCreatedVersion,at,replyContent,repliedAt,major_version,token_length,doc_length,content_corrected,lan_code,contains_only_english
1921,no transparency in account ban. just flashes a...,7,7_banned_account_reason_ban,banned - account - reason - ban - blocked - nu...,0.141878,False,65b7e1d3-0a94-46ab-a273-875cfc24876b,santosh kumar,https://play-lh.googleusercontent.com/a/AGNmyx...,No transparency in account ban. Just flashes a...,...,2.23.7.14,2023-04-04T08:07:34,,,2.23,44,283,no transparency in account ban. just flashes a...,['English'],True
2019,"split screen horrible, it's a new beta yesterd...",2,2_split_screen_fold_tablet,split - screen - fold - tablet - layout - view...,0.045564,False,d401f75e-c944-4c20-a89a-4b694da09518,Spartak Abdullin,https://play-lh.googleusercontent.com/a-/ACB-R...,"Split Screen Horrible, it's a new BETA Yesterd...",...,2.23.7.14,2023-03-31T06:43:23,,,2.23,79,494,"split screen horrible, it's a new beta yesterd...",['English'],True
2531,"pathetic app, deleted my all data including ba...",255,255_restoration_existent_backup_restore,restoration - existent - backup - restore - ba...,0.015314,False,c5c80235-1de3-443c-be16-7b2e7b64ec6a,jatin saini,https://play-lh.googleusercontent.com/a/AGNmyx...,"Pathetic app, deleted my all data including ba...",...,2.23.7.14,2023-03-28T07:53:05,,,2.23,25,132,"pathetic app, deleted my all data including ba...",['English'],True
5200,already for two weeks i have the same problem ...,19,19_code_verification_verify_sms,code - verification - verify - sms - number - ...,0.018523,False,120e309a-10c0-4e5f-a9ec-bd1fe02645bb,Mission Armenia,https://play-lh.googleusercontent.com/a/AGNmyx...,Already for two weeks i have the same problem ...,...,2.23.7.14,2023-04-01T12:47:30,,,2.23,35,176,already for two weeks i have the same problem ...,['English'],True
6926,can't see my messages which was marked star. a...,187,187_history_hung_went_restarted,history - hung - went - restarted - closing - ...,0.01091,False,a2568dff-1aa9-4f41-9e74-c748303a11ae,Pankaj Das,https://play-lh.googleusercontent.com/a-/ACB-R...,Can't see my messages which was marked star. A...,...,2.23.7.14,2023-04-02T15:04:55,,,2.23,11,63,can't see my messages which was marked star. a...,['English'],True
7209,i am stuck at the front page because of expira...,14,14_pending_trying_download_update,pending - trying - download - update - store -...,0.041701,False,d046b379-92d1-4539-9f45-0920b359b97a,Rasel K,https://play-lh.googleusercontent.com/a-/ACB-R...,I am stuck at the front page because of expira...,...,2.23.7.14,2023-03-28T05:49:54,,,2.23,18,96,i am stuck at the front page because of expira...,['English'],True
8425,bogus app... i'm on latest beta version but it...,90,90_update_updates_version_updated,update - updates - version - updated - updatin...,0.018917,False,a086cb99-3cda-49f5-a075-e021b9064695,MANISH LAMBA,https://play-lh.googleusercontent.com/a-/ACB-R...,Bogus app... I'm on latest beta version but it...,...,2.23.7.14,2023-03-28T08:02:40,,,2.23,22,119,bogus app... i'm on latest beta version but it...,['English'],True
12653,"beta version deleted all my data, up to 5+ yea...",0,0_backup_restore_drive_lost,backup - restore - drive - lost - history - lo...,0.066683,False,a25fdc94-8a38-4509-bc06-e8cd1f66dec4,Remy Homsi,https://play-lh.googleusercontent.com/a-/ACB-R...,"beta version deleted all my data, up to 5+ yea...",...,2.23.7.14,2023-03-28T21:09:02,,,2.23,12,62,"beta version deleted all my data, up to 5+ yea...",['English'],True
14798,nice but i have issue with something is not wo...,82,82_working_whatsapp_properly_solve,working - whatsapp - properly - solve - pleas ...,0.024402,False,43dc26de-adc4-41a8-8a54-b30a7287ef72,Parth Patel,https://play-lh.googleusercontent.com/a-/ACB-R...,Nice but I have issue with something is not wo...,...,2.23.7.14,2023-04-08T06:48:04,,,2.23,14,75,nice but i have issue with something is not wo...,['English'],True
15906,can't delete app samsung s9 android 10,113,113_uninstall_itself_off_again,uninstall - itself - off - again - restart - s...,0.01413,False,2a7e1fce-093d-4af2-bbb8-1523d23df60d,Vladyslav V,https://play-lh.googleusercontent.com/a/AGNmyx...,Can't delete app Samsung S9 Android 10,...,2.23.7.14,2023-03-29T12:03:24,,,2.23,6,38,can't delete app samsung s9 android 10,['English'],True


In [57]:
visualize_version(topic_model_whatsapp, df_info_whatsapp, topics_per_class_whatsapp, ['2.23.7.14'])

In [59]:
get_documents_version(topic_model_whatsapp, df_whatsapp, '2.23.7.14', 0)

['beta version deleted all my data, up to 5+ years of files lost']

## Paypal Version Specific Analysis

In [60]:
classes_complete_paypal = list(df_paypal['reviewCreatedVersion'])
versions = list(set(classes_complete_paypal))
versions.sort(key=lambda x: list(map(int, x.split('.'))))

In [61]:
[x for x in versions if x.split('.')[0] == '8'][50:]

['8.21.0',
 '8.21.1',
 '8.22.0',
 '8.22.1',
 '8.23.0',
 '8.24.0',
 '8.25.0',
 '8.26.0',
 '8.27.0',
 '8.28.0',
 '8.28.1',
 '8.29.0',
 '8.29.1',
 '8.29.2',
 '8.30.0',
 '8.30.1',
 '8.30.2',
 '8.31.0',
 '8.32.0',
 '8.32.1',
 '8.33.0',
 '8.34.0',
 '8.35.0',
 '8.36.0',
 '8.36.1',
 '8.37.1',
 '8.38.0']

In [62]:
topics_per_class_paypal = topic_model_paypal.topics_per_class(df_paypal['content_corrected'], classes=classes_complete_paypal)

232it [00:08, 28.75it/s]


In [63]:
df_info_paypal = get_document_info_expand(topic_model_paypal, df_paypal)

In [64]:
df_info_paypal[df_info_paypal['reviewCreatedVersion'] == '8.37.1']

Unnamed: 0,Document,Topic,Name,Top_n_words,Probability,Representative_document,reviewId,userName,userImage,content,...,reviewCreatedVersion,at,replyContent,repliedAt,major_version,token_length,doc_length,content_corrected,lan_code,contains_only_english
35,absolutely pointless. login with fingerprint a...,3,3_fingerprint_pin_finger_print,fingerprint - pin - finger - print - asks - lo...,0.066813,False,28a52a78-82d6-4cfc-8773-1bfd43e16133,Carbon,https://play-lh.googleusercontent.com/a-/ACB-R...,Absolutely pointless. Login with fingerprint a...,...,8.37.1,2023-04-12T13:13:34,,,8,57,343,absolutely pointless. login with fingerprint a...,['English'],True
38,i have been with paypal for 10yrs +. paypal cr...,85,85_fraudulent_scam_skype_protect,fraudulent - scam - skype - protect - paypal -...,0.024056,False,e4573f77-b814-41d7-843b-c47ca129d704,Luke Floyd,https://play-lh.googleusercontent.com/a/AGNmyx...,I have been with Paypal for 10yrs +. Paypal cr...,...,8.37.1,2023-04-14T10:53:58,,,8,65,373,i have been with paypal for 10yrs +. paypal cr...,['English'],True
40,"slow and clunky, there are many link i click o...",136,136_online_xbox_selling_buying,online - xbox - selling - buying - shop - sell...,0.017731,False,7fbd334b-370f-4201-9775-df55a6107e4e,Austin McCracken,https://play-lh.googleusercontent.com/a-/ACB-R...,"Slow and clunky, there are many link I click o...",...,8.37.1,2023-04-09T18:57:40,We're sorry you're having trouble with the app...,2023-04-12T18:02:31,8,73,358,"slow and clunky, there are many link i click o...",['English'],True
45,"the app ok, the service sucks . zero fraud det...",110,110_fraudulent_correcting_co_company,fraudulent - correcting - co - company - fraud...,0.042599,False,551438e6-e0ef-4a91-948d-d259568f18c6,Wesley Jackson,https://play-lh.googleusercontent.com/a-/ACB-R...,"The app ok, the service sucks . Zero fraud det...",...,8.37.1,2023-04-08T00:30:59,We're sorry to hear about your recent experien...,2023-04-08T14:43:26,8,65,361,"the app ok, the service sucks . zero fraud det...",['English'],True
47,can't even tell if the app does anything. if y...,624,624_entering_aborts_troubling_edit,entering - aborts - troubling - edit - securit...,0.665658,True,fad36f35-0d02-4998-98ac-883875660f6e,John Elsten,https://play-lh.googleusercontent.com/a-/ACB-R...,Can't even tell if the app does anything. If y...,...,8.37.1,2023-04-13T02:38:35,We're sorry you're having trouble with the app...,2023-04-14T15:50:27,8,50,270,can't even tell if the app does anything. if y...,['English'],True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25888,i cannot link my bank account,1,1_link_card_bank_debit,link - card - bank - debit - add - linked - tr...,0.758400,False,dc721dea-214f-43b8-9eab-801106539e42,Luke A,https://play-lh.googleusercontent.com/a-/ACB-R...,I cannot link my bank account,...,8.37.1,2023-04-13T00:46:49,We're sorry that you can't link your bank acco...,2023-04-14T14:53:52,8,6,29,i cannot link my bank account,['English'],True
29511,do not download they take your money,25,25_download_apps_scammed_this,download - apps - scammed - this - hold - they...,0.750461,True,2a3ffae6-e47f-4aa9-afe2-c1e0fd1960da,Sarah Burns,https://play-lh.googleusercontent.com/a/AGNmyx...,Do not download they take your money,...,8.37.1,2023-04-12T15:39:32,We're sorry to hear about your recent experien...,2023-04-13T21:23:02,8,7,36,do not download they take your money,['English'],True
29517,can't link my bank account,1,1_link_card_bank_debit,link - card - bank - debit - add - linked - tr...,0.091163,False,e28ed69c-d666-46b0-ae27-66b793d3e2a0,tamoghna dey,https://play-lh.googleusercontent.com/a-/ACB-R...,Can't link my bank account,...,8.37.1,2023-04-10T23:00:49,We understand that you're having trouble linki...,2023-04-13T14:15:16,8,5,26,can't link my bank account,['English'],True
29530,i made one donation and you made 2 from me,298,298_donation_donations_streamlabs_donate,donation - donations - streamlabs - donate - f...,0.011717,False,c82df32e-c493-4009-93bd-6ff5639c1705,Arlene,https://play-lh.googleusercontent.com/a-/ACB-R...,I made one donation and you made 2 from me,...,8.37.1,2023-04-13T21:08:05,We're sorry to hear about your recent experien...,2023-04-14T20:14:36,8,9,42,i made one donation and you made 2 from me,['English'],True


In [70]:
visualize_version(topic_model_paypal, df_info_paypal, topics_per_class_paypal, ['8.37.1'], threshold=2, height=600)

In [68]:
get_documents_version(topic_model_paypal, df_paypal, '8.37.1', -1)

["the worst experience I have ever had! I used the app for a few days and firstly my money was held with no reason and I'm not a seller. Support can't do anything about this. And after I have tried to solve this problem with support they just blocked my account and will hold my money for 180 days. Isn't it a reason why it's not a s**t?",
 'So far, havent had any of the usual issues paypal seems to have that competitors do not. Hopefully it stays that way! Can never have too many options',
 "Hi paypal, i made an account , and i used the wrong number. I've been trying to change the number in login, but it won't work. Could you please add a help button at login for mistaking your number",
 "It's the worst app to use for a savings account. I'm glad I have Credit Karma, it's better and WAY MUCH EASY-TO-USE and navigate.",
 "I'll be closing my account since at least 2003. You are ending PayPal Here and I'm ending business with you. I have got a Square reader ($free) coming and a square debit

## Amazon Version Specific Analysis

### Visualize topics per version

In [71]:
classes_complete_amazon = list(df_amazon['reviewCreatedVersion'])
versions = list(set(classes_complete_amazon))
versions.sort(key=lambda x: list(map(int, x.split('.'))))

In [72]:
versions

['5.2.3',
 '5.3.1.100',
 '5.4.0.100',
 '5.4.4.100',
 '5.5.0.100',
 '5.6.0.100',
 '5.6.2.100',
 '5.7.0.100',
 '5.7.2.100',
 '5.8.0.100',
 '5.9.0.100',
 '6.0.0.100',
 '6.0.2.100',
 '6.1.0.100',
 '6.1.4.100',
 '6.2.0.100',
 '6.3.1.100',
 '6.4.0.100',
 '6.5.0.100',
 '6.6.0.100',
 '6.7.1.100',
 '6.8.0.100',
 '8.0.0.100',
 '8.1.0.100',
 '8.2.0.100',
 '8.3.0.100',
 '8.4.0.100',
 '8.5.0.100',
 '8.6.0.100',
 '8.6.1.100',
 '8.7.0.100',
 '8.8.0.100',
 '8.9.0.100',
 '8.9.2.100',
 '8.9.3.100',
 '10.1.0.100',
 '10.2.0.100',
 '10.3.0.100',
 '10.4.0.100',
 '10.5.0.100',
 '10.6.0.100',
 '10.7.0.100',
 '10.8.0.100',
 '10.9.0.100',
 '12.0.0.100',
 '12.0.1.100',
 '12.1.0.100',
 '12.2.0.100',
 '12.2.1.100',
 '12.4.0.100',
 '12.5.0.100',
 '12.6.0.100',
 '12.7.0.100',
 '12.8.0.100',
 '12.9.0.100',
 '14.0.0.100',
 '14.0.1.100',
 '14.1.0.100',
 '14.2.0.100',
 '14.2.1.100',
 '16.01.0.100',
 '16.02.0.100',
 '16.3.0.100',
 '16.4.0.100',
 '16.5.0.100',
 '16.6.0.100',
 '16.7.0.100',
 '16.8.0.100',
 '16.9.0.100',
 '

In [73]:
topics_per_class_amazon = topic_model_amazon.topics_per_class(df_amazon['content_corrected'], classes=classes_complete_amazon)

193it [00:07, 24.25it/s]


In [74]:
df_info_amazon = get_document_info_expand(topic_model_amazon, df_amazon)

In [75]:
df_info_amazon[df_info_amazon['reviewCreatedVersion'] == '26.8.0.100']

Unnamed: 0,Document,Topic,Name,Top_n_words,Probability,Representative_document,reviewId,userName,userImage,content,...,reviewCreatedVersion,at,replyContent,repliedAt,major_version,token_length,doc_length,content_corrected,lan_code,contains_only_english
101,amazon used to be amazing but it's getting har...,1,1_prime_day_shipping_days,prime - day - shipping - days - membership - p...,0.057114,False,aaca596c-a1c9-483c-a75e-40b1262eee28,Kyle Cleveland,https://play-lh.googleusercontent.com/a/AGNmyx...,Amazon used to be amazing but it's getting har...,...,26.8.0.100,2023-04-13T19:32:12,,,26,100,498,amazon used to be amazing but it's getting har...,['English'],True
102,horrible app. doesn't let you put in password....,46,46_number_phone_verification_account,number - phone - verification - account - emai...,0.022003,False,f5f12a4f-ff01-409e-96b8-f92170da82d6,Yaitza,https://play-lh.googleusercontent.com/a/AGNmyx...,Horrible app. Doesn't let you put in password....,...,26.8.0.100,2023-04-13T05:43:54,,,26,53,298,horrible app. doesn't let you put in password....,['English'],True
139,amazon customer service and overall business i...,5,5_they_delivery_service_delivered,they - delivery - service - delivered - amazon...,0.028461,False,b01e02f3-e968-4f4f-b685-b1171aede385,Joseph Strichko,https://play-lh.googleusercontent.com/a-/ACB-R...,Amazon customer service and overall business i...,...,26.8.0.100,2023-04-12T20:54:06,,,26,88,500,amazon customer service and overall business i...,['English'],True
227,update: when you thought things can't get any ...,28,28_alexa_voice_microphone_button,alexa - voice - microphone - button - disable ...,1.0,False,a8a67401-f113-4891-bcc4-788e509bbb5a,Mick .Y,https://play-lh.googleusercontent.com/a/AGNmyx...,Update: when you thought things can't get any ...,...,26.8.0.100,2023-04-14T06:23:27,,,26,78,428,update: when you thought things can't get any ...,['English'],True
229,"it is aber monday, and my amazon app (on phone...",2,2_spanish_english_language_french,spanish - english - language - french - speak ...,0.091257,False,d9d04079-230d-4d4f-8894-0f79b1cae733,Isaac Bundy,https://play-lh.googleusercontent.com/a-/ACB-R...,"It is Cyber Monday, and my Amazon app (on phon...",...,26.8.0.100,2023-04-13T05:40:49,,,26,60,307,"it is aber monday, and my amazon app (on phone...",['English'],True
308,company is so unprofessional & leaves zero opt...,33,33_drivers_door_driver_instructions,drivers - door - driver - instructions - packa...,0.046265,False,7a9468dc-5dc5-4be2-a99a-b66b0a5b3f26,Philip L,https://play-lh.googleusercontent.com/a-/ACB-R...,Company is so unprofessional & leaves zero opt...,...,26.8.0.100,2023-04-12T20:44:55,,,26,76,409,company is so unprofessional & leaves zero opt...,['English'],True
416,worst experience i've ever had! i bought a uni...,5,5_they_delivery_service_delivered,they - delivery - service - delivered - amazon...,0.020642,False,f5e765a0-905c-452d-9621-848aba750f41,David Reid,https://play-lh.googleusercontent.com/a-/ACB-R...,Worst experience I've ever had! I bought a uni...,...,26.8.0.100,2023-04-14T01:55:58,,,26,83,449,worst experience i've ever had! i bought a uni...,['English'],True
538,the little light bulb icon at the bottom that ...,11,11_orders_load_view_loading,orders - load - view - loading - see - history...,0.020616,False,0c7354f3-66e7-4e61-ad89-0c1a7e0c0052,Miyuki Ayumu,https://play-lh.googleusercontent.com/a-/ACB-R...,The little light bulb icon at the bottom that ...,...,26.8.0.100,2023-04-13T18:11:54,,,26,40,216,the little light bulb icon at the bottom that ...,['English'],True
559,i have had extremely terrible experiences with...,5,5_they_delivery_service_delivered,they - delivery - service - delivered - amazon...,0.018126,False,11d49e18-1970-4985-87c0-f2475b28d751,Brian Snow,https://play-lh.googleusercontent.com/a-/ACB-R...,I have had extremely terrible experiences with...,...,26.8.0.100,2023-04-13T23:22:10,,,26,97,485,i have had extremely terrible experiences with...,['English'],True
681,i don't know who's dumb idea it was to put thi...,231,231_inspire_tab_tiktok_l8r,inspire - tab - tiktok - l8r - remove - inspir...,0.285348,False,15fb2c68-b9bc-4170-8d32-98f4b7c44ca6,Savannah Rose,https://play-lh.googleusercontent.com/a/AGNmyx...,I don't know who's dumb idea it was to put thi...,...,26.8.0.100,2023-04-14T13:43:02,,,26,54,268,i don't know who's dumb idea it was to put thi...,['English'],True


In [76]:
visualize_version(topic_model_amazon, df_info_amazon, topics_per_class_amazon, ['26.8.0.100'], threshold=2, height=300)

In [78]:
get_documents_version(topic_model_amazon, df_amazon, '26.8.0.100', 5)

["Amazon customer service and overall business is declining, their customer service has gone down the tubes, some don't know how to resolve issues with customers orders, they make you wait on the phone while they're trying to figure out a way to fix the issue, then they make customers wait for 24hrs or longer and hopefully they can correct their problems. They need to give us customers that option on how things are shipped, rather than making us wait and hope their isn't an issue with the shipment",
 "Worst experience I've ever had! I bought a unit, and someone at Amazon DIDN'T DO THEIR JOB! I received the unit in non working order! They refuse to do anything about it! Mind you, they took my money within an hour of placing the order! But I can't get refunded until they have product in hand, and 3-5 business days AFTER THAT? Customer service is very rude! As son as I get my refund? I'm cancelling my subscription, and WILL NEVER USE THEM AGAIN!",
 "I have had extremely terrible experienc