https://github.com/ddangelov/Top2Vec

This version uses a constructed text of the metadata for each video
- title
- channel name
- description
- video tags (if available)
- category 

In [1]:
from top2vec import Top2Vec
import pandas as pd 
import re

df = pd.read_csv('metadata_stats_1k_200words.csv')

# to deal with nan places (e.g. videos with no description)
df = df.fillna('')

# Concatenate all the information with labels
df['all_info'] = 'title: ' + df['title'] + ', channel name: ' + df['channel_title'] + ', description: ' + \
df['description'] + ', video tags: ' + df['video_tags'] + ', category: ' + df['category']

# Convert to list
raw_text = df['all_info'].tolist()

# function to remove URLs
def remove_urls(text):
    text = re.sub(r'http\S+|www.\S+', '', text)
    return text


docs = [remove_urls(text) for text in raw_text]
docs

  from pandas.core.computation.check import NUMEXPR_INSTALLED


["title: Unlock YOUR Phone: Prove You’re Not Cheating With My Cousin! | KARAMO, channel name: Karamo Show, description: After Sylvia allowed her cousin, Santa, to move in with her and her boyfriend, Lawrence, things started to get messy. Sylvia claims ..., video tags: 'Karamo', category: People & Blogs",
 "title: Snow Fun! | My Little Pony:Tell Your Tale Full Episode MLP Equestria Magic, channel name: My Little Pony Official, description: Watch more episodes:  ❤️ Subscribe to the My Little Pony Channel:  Unicorns, ..., video tags: 'my little pony', 'pony', 'little pony', 'my little pony song', 'equestria girls', 'my little pony equestria girls', 'unicorn', 'my little pony equestria girl', 'mlp', 'pony song', 'my little pony songs', 'my little pony full episode', 'my little pony tell your tale', 'unicorn cartoon', 'kuda poni', 'mlp tell your tale', 'twilight sparkle', 'my little pony theme song', 'equestria girls song', 'pinkie pie', 'pony life', 'mlp equestria girls', 'little pony song

I was going to do some preprocessing, but maybe don't need to: https://github.com/ddangelov/Top2Vec/issues/51
- (the only thing that I removed was URLs)

In [2]:
document_ids = df['video_id'].tolist()

ngram_model = Top2Vec(documents=docs, speed='deep-learn', workers=32, ngram_vocab=True, 
                       document_ids=document_ids, embedding_model='all-MiniLM-L6-v2')

Save the model

In [3]:
ngram_model.save('25k_ngram_metadata_MiniLM_L6')


Load the model (after you have trained and saved the model, you can just load it from here)

In [3]:
model = Top2Vec.load('25k_ngram_metadata_MiniLM_L6')

#### Get number of topics

In [4]:
model.get_num_topics()

208

#### Get topic sizes

In [5]:
topic_sizes, topic_nums = model.get_topic_sizes()
size_nums = pd.DataFrame({'topic_nums':topic_nums, 'topic_sizes':topic_sizes})
size_nums


Unnamed: 0,topic_nums,topic_sizes
0,0,797
1,1,761
2,2,682
3,3,660
4,4,596
...,...,...
203,203,22
204,204,21
205,205,21
206,206,20


In [6]:
size_nums['topic_sizes'].sum()

25434

#### Get Topics
This will return the topics in decreasing size.

In [7]:
topic_words, word_scores, topic_nums = model.get_topics(208)

topics_df = pd.DataFrame({'topic_number': topic_nums, 'topic_words': [tw for tw in topic_words]})
topics_df['topic_words'] = topics_df['topic_words'].apply(lambda x: str(x).replace('[','').replace(']',''))
topics_df
# topics_df.to_csv('topic_nums_and_words.csv', index=False)


Unnamed: 0,topic_number,topic_words
0,0,'channel name' 'celebrity gossip' 'itv showbiz...
1,1,'people blogs' 'channel name' 'blogs' 'videos ...
2,2,'newscast politics' 'msnbc' 'msnbc msnbc' 'msn...
3,3,'automotive cars' 'automotive' 'autos vehicles...
4,4,'rap song' 'lyrics powfu' 'lyrics' 'diss song'...
...,...,...
203,203,'video editors' 'manga dub' 'manga dubs' 'dub ...
204,204,'dwight pranks' 'the office' 'office bloopers'...
205,205,'single women' 'women single' 'value woman' 'm...
206,206,'jim cramer' 'cramer says' 'cramer' 'cnbc mad'...


add the probabilities to the words

In [8]:
words_and_scores = []
for words_list, score_list in zip(topic_words, word_scores):
    temp = []
    for word, score in zip(words_list, score_list):
        temp.append(word + " (" + str(round(score, 2)) + ")")
    words_and_scores.append(", ".join(temp))

topic_words_scores_df = pd.DataFrame({'topic_number': topic_nums, 'topic_words': words_and_scores})
topic_words_scores_df

# topic_words_scores_df.to_csv('MiniLM_L6_metadata_model_topics.csv', index=False, encoding='utf-8')


Unnamed: 0,topic_number,topic_words
0,0,"channel name (0.56), celebrity gossip (0.49), ..."
1,1,"people blogs (0.55), channel name (0.55), blog..."
2,2,"newscast politics (0.62), msnbc (0.61), msnbc ..."
3,3,"automotive cars (0.54), automotive (0.53), aut..."
4,4,"rap song (0.58), lyrics powfu (0.56), lyrics (..."
...,...,...
203,203,"video editors (0.6), manga dub (0.54), manga d..."
204,204,"dwight pranks (0.61), the office (0.55), offic..."
205,205,"single women (0.52), women single (0.52), valu..."
206,206,"jim cramer (0.67), cramer says (0.57), cramer ..."


Add the label column after working on it manually

In [9]:
labels_df = pd.read_csv('MiniLM_L6_metadata_model_topics.csv')

topic_words_scores_df['label'] = labels_df['label']
topic_words_scores_df

Unnamed: 0,topic_number,topic_words,label
0,0,"channel name (0.56), celebrity gossip (0.49), ...",Celebrity / gossip / entertainment / comedy
1,1,"people blogs (0.55), channel name (0.55), blog...",Blogs / vlogs / viral videos / YouTubers / li...
2,2,"newscast politics (0.62), msnbc (0.61), msnbc ...",News / politics
3,3,"automotive cars (0.54), automotive (0.53), aut...",Cars / automotive
4,4,"rap song (0.58), lyrics powfu (0.56), lyrics (...",Rap / lyrics / songs
...,...,...,...
203,203,"video editors (0.6), manga dub (0.54), manga d...",video editing / manga / dubs / anime
204,204,"dwight pranks (0.61), the office (0.55), offic...",TV shows / comedy / The Office (U.S.)
205,205,"single women (0.52), women single (0.52), valu...",dating / advice / women / gender
206,206,"jim cramer (0.67), cramer says (0.57), cramer ...",financial news / the stock market / Jim Cramer


In [24]:
# topic_words_scores_df.to_csv('topic_nums_words_scores.csv', index=False)

Add top topic to each document:
- https://stackoverflow.com/questions/74026634/top2vec-reassign-topics-to-original-df
- but need to look around and check the docs, is 'get_documents_topics depreceated?
- try to find more about 'doc_top'

In [10]:
df['topic_number'] = model.doc_top

In [11]:
df

Unnamed: 0,video_id,title,published_at,channel_title,channel_id,description,etag,search_term,video_length,time_in_seconds,...,comment_count,category_id,category,video_tags,stats_date,days_since_upload,views_per_day,text_length,all_info,topic_number
0,CEGUXguToQM,Unlock YOUR Phone: Prove You’re Not Cheating W...,2022-11-15T22:00:05Z,Karamo Show,UC8VB-yjHltith0-g2LshPJA,"After Sylvia allowed her cousin, Santa, to mov...",NaBho42x9mSLsX2N7D_zNiAMIcU,about_my_your,PT15M43S,943.0,...,481.0,22,People & Blogs,'Karamo',2023-01-23T18:46:46.208660,70,3095.19,3230,title: Unlock YOUR Phone: Prove You’re Not Che...,37
1,4you17X6U48,Snow Fun! | My Little Pony:Tell Your Tale Full...,2022-12-24T00:15:00Z,My Little Pony Official,UC9qgVkKRZUXcgHdf35Z-8dw,Watch more episodes: https://bit.ly/36Z4i7o ❤️...,lp5QFQPO-8vWjHlnJKEN56LTLkk,about_my_your,PT4M58S,298.0,...,,1,Film & Animation,"'my little pony', 'pony', 'little pony', 'my l...",2023-01-23T18:46:46.208660,31,229.94,341,title: Snow Fun! | My Little Pony:Tell Your Ta...,149
2,es7T1jmz824,"Dad, Bufo Misses You! - Your Dad vs My Dad - K...",2022-04-21T04:01:53Z,Wolfoo Family,UCoL0M9swO14BT8u9pTn9MvQ,"Dad, Bufo Misses You! - Your Dad vs My Dad - K...",DxWWJZ_7R-Q7lKwhIUfbEpBHnMc,about_my_your,PT11M46S,706.0,...,0.0,1,Film & Animation,"'Wolfoo family kids cartoon', 'Wolfoo family',...",2023-01-23T18:46:46.208660,278,7571.68,294,"title: Dad, Bufo Misses You! - Your Dad vs My ...",85
3,wPbjP1d81uE,My Little Pony: Tell Your Tale | My Bananas | ...,2022-08-04T11:00:28Z,My Little Pony Official,UC9qgVkKRZUXcgHdf35Z-8dw,Watch more episodes: https://bit.ly/36Z4i7o ❤️...,FtrG4aUblR_8mfa0RQa1jboYzSE,about_my_your,PT5M9S,309.0,...,,1,Film & Animation,"'my little pony', 'pony', 'little pony', 'my l...",2023-01-23T18:46:46.208660,173,14434.87,328,title: My Little Pony: Tell Your Tale | My Ban...,149
4,UDyNOyBcfb8,My Little Pony: Tell Your Tale | PIPP COMPILAT...,2022-05-28T14:00:34Z,My Little Pony Official,UC9qgVkKRZUXcgHdf35Z-8dw,Watch more episodes: https://bit.ly/36Z4i7o ❤️...,zw5bN10tOXz3AWTnqrWubs4BLWk,about_my_your,PT19M53S,1193.0,...,,1,Film & Animation,"'my little pony', 'pony', 'little pony', 'my l...",2023-01-23T18:46:46.208660,241,3468.51,1424,title: My Little Pony: Tell Your Tale | PIPP C...,149
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25429,uyXKAT8q8Uc,&#39;Thank you Southwest&#39;,2022-12-31T00:38:44Z,CBS Chicago,UCkBS_xU1WQ7FVsDoQ_OPeDg,CBS 2's Noel Brennan reports from Midway Airpo...,h_ei6zVZ0bCcjnOlqxtgT4JOdHo,you_been_few,PT2M31S,151.0,...,,25,News & Politics,'CBS 2 News Evening',2023-01-24T19:18:56.843446,24,80.54,488,"title: &#39;Thank you Southwest&#39;, channel ...",76
25430,kX_phLxLdN0,Bert Kreischer Loves Dogs...A Little Too Much,2022-12-30T16:00:21Z,PINGTR1P,UCF_EHniFYMK03DvZLUtTvzg,Bert finally admits what we've all been suspec...,mluxbeR9gPH0EZ-s3oXo7Y3nwmU,you_been_few,PT2M18S,138.0,...,277.0,23,Comedy,"'joe rogan', 'bert kreischer', 'tom segura', '...",2023-01-24T19:18:56.843446,25,1536.60,328,title: Bert Kreischer Loves Dogs...A Little To...,14
25431,J2u6IgsHgDU,New Illinois laws for 2023 to affect drivers o...,2022-12-31T00:29:48Z,CBS Chicago,UCkBS_xU1WQ7FVsDoQ_OPeDg,There will be changes when you get behind the ...,3X29uR612Kc4Q9uQ5RXW6ujM7O4,you_been_few,PT2M48S,168.0,...,,25,News & Politics,"'CBS 2 News Evening', 'New Laws', 'Illinois', ...",2023-01-24T19:18:56.843446,24,10075.25,511,title: New Illinois laws for 2023 to affect dr...,155
25432,CNCz5p-fTeg,Some of you been thinking very similar 💵🎁🥺,2022-12-20T03:13:36Z,Prophetess Shaneik,UCnr0wO15xYGgSkHnjztdJtA,Not feeling too great today . Sorry about the ...,ruPrLf4Ri9uLGIAoO2i9r6ERiTo,you_been_few,PT3M1S,181.0,...,40.0,22,People & Blogs,,2023-01-24T19:18:56.843446,35,30.91,529,title: Some of you been thinking very similar ...,1


Create a df to verify if the topic labels seem to be accurate or not

In [20]:
# verify_df = df[['video_id', 'topic_number','title', 'channel_title', 'description', 'category']]
# verify_df

Unnamed: 0,video_id,topic_number,title,channel_title,description,category
0,CEGUXguToQM,23,Unlock YOUR Phone: Prove You’re Not Cheating W...,Karamo Show,"After Sylvia allowed her cousin, Santa, to mov...",People & Blogs
1,4you17X6U48,150,Snow Fun! | My Little Pony:Tell Your Tale Full...,My Little Pony Official,Watch more episodes: https://bit.ly/36Z4i7o ❤️...,Film & Animation
2,es7T1jmz824,81,"Dad, Bufo Misses You! - Your Dad vs My Dad - K...",Wolfoo Family,"Dad, Bufo Misses You! - Your Dad vs My Dad - K...",Film & Animation
3,wPbjP1d81uE,150,My Little Pony: Tell Your Tale | My Bananas | ...,My Little Pony Official,Watch more episodes: https://bit.ly/36Z4i7o ❤️...,Film & Animation
4,UDyNOyBcfb8,150,My Little Pony: Tell Your Tale | PIPP COMPILAT...,My Little Pony Official,Watch more episodes: https://bit.ly/36Z4i7o ❤️...,Film & Animation
...,...,...,...,...,...,...
25429,uyXKAT8q8Uc,99,&#39;Thank you Southwest&#39;,CBS Chicago,CBS 2's Noel Brennan reports from Midway Airpo...,News & Politics
25430,kX_phLxLdN0,297,Bert Kreischer Loves Dogs...A Little Too Much,PINGTR1P,Bert finally admits what we've all been suspec...,Comedy
25431,J2u6IgsHgDU,1,New Illinois laws for 2023 to affect drivers o...,CBS Chicago,There will be changes when you get behind the ...,News & Politics
25432,CNCz5p-fTeg,73,Some of you been thinking very similar 💵🎁🥺,Prophetess Shaneik,Not feeling too great today . Sorry about the ...,People & Blogs


create a df with the video id and top topic associated with each video

In [12]:
topnum_df = df[['video_id', 'topic_number']]

topnum_df

Unnamed: 0,video_id,topic_number
0,CEGUXguToQM,37
1,4you17X6U48,149
2,es7T1jmz824,85
3,wPbjP1d81uE,149
4,UDyNOyBcfb8,149
...,...,...
25429,uyXKAT8q8Uc,76
25430,kX_phLxLdN0,14
25431,J2u6IgsHgDU,155
25432,CNCz5p-fTeg,1


In [22]:
# topnum_df.to_csv('vidid_topicNumber_from_metadata.csv', index=False)

merge the topic labels to the df with the top topic for each video
- at first I added the topic words too, but I decided that the df would probably be less readable
- I will refer back tp 'topic_nums_words_scores.csv' to amend any labels that need to be amended
- then re-add the labels later

In [13]:
# labels_df = pd.read_csv('metadata_model_topics.csv')

merged_df = topnum_df.merge(labels_df, on='topic_number')
merged_df2 = merged_df[['video_id', 'topic_number', 'label']]
merged_df2


Unnamed: 0,video_id,topic_number,label
0,CEGUXguToQM,37,marriage / divorce / cheating / affairs
1,B6bbXuUFLAw,37,marriage / divorce / cheating / affairs
2,_qZvEExuClA,37,marriage / divorce / cheating / affairs
3,zE6LRYUelHg,37,marriage / divorce / cheating / affairs
4,Zlbf0PkV_Pw,37,marriage / divorce / cheating / affairs
...,...,...,...
25429,FDO1OsJMeGQ,195,video games / The Legend of Zelda / gameplay
25430,nsdD3xBf1zY,195,video games / The Legend of Zelda / gameplay
25431,7yd_9zQff_g,195,video games / The Legend of Zelda / gameplay
25432,jXAmaY6EvOQ,195,video games / The Legend of Zelda / gameplay


In [14]:
# merged_df2.to_csv('MiniLM_L6_vidId_topic_metadata.csv', index=False)

add some metadata to check if the labels are accurate or not

In [15]:
label_check_df = merged_df2.merge(df, on='video_id')
label_check_df_2 = label_check_df[['video_id', 'topic_number_x', 'label', 'title', 'channel_title', 'description', 'category']]
label_check_df_2 = label_check_df_2.rename({'topic_number_x': 'topic_number'}, axis='columns')
label_check_df_2

Unnamed: 0,video_id,topic_number,label,title,channel_title,description,category
0,CEGUXguToQM,37,marriage / divorce / cheating / affairs,Unlock YOUR Phone: Prove You’re Not Cheating W...,Karamo Show,"After Sylvia allowed her cousin, Santa, to mov...",People & Blogs
1,B6bbXuUFLAw,37,marriage / divorce / cheating / affairs,ANSWERING YOUR ASSUMPTIONS...ABOUT MY HUSBAND,Elliot Brooks,ANSWERING YOUR ASSUMPTIONS...ABOUT MY HUSBAND ...,Entertainment
2,_qZvEExuClA,37,marriage / divorce / cheating / affairs,You Took My Daughter&#39;s Virginity... Stop D...,TheMauryShowOfficial,Now that the DNA test has been revealed will A...,Entertainment
3,zE6LRYUelHg,37,marriage / divorce / cheating / affairs,Your Question Answered About My First Marriage,Desi Jatt Uk,,People & Blogs
4,Zlbf0PkV_Pw,37,marriage / divorce / cheating / affairs,marry your first cousin challenge (my big fat ...,kamek,i do be tweeting sometimes:https://twitter.com...,Entertainment
...,...,...,...,...,...,...,...
25429,FDO1OsJMeGQ,195,video games / The Legend of Zelda / gameplay,nintendo added a very strange beef boss to nin...,Poofesure,nintendo added a very strange beef boss to nin...,Gaming
25430,nsdD3xBf1zY,195,video games / The Legend of Zelda / gameplay,Things You *NEVER* KNEW about EVERY DAY Items!,MoreAliA,Things You *NEVER* KNEW about EVERY DAY Items!...,Gaming
25431,7yd_9zQff_g,195,video games / The Legend of Zelda / gameplay,A Very Different Haunted Mansion #disney #disn...,Disney Cicerone,The last concept was eventually used in the qu...,People & Blogs
25432,jXAmaY6EvOQ,195,video games / The Legend of Zelda / gameplay,Melee&#39;s &quot;Glitches&quot; (that aren&#3...,AsumSaus,Sometimes it really is just meant to be that w...,Gaming


In [16]:
# label_check_df.to_csv('MiniLM_L6_label_check_metadata_model.csv', index=False)

Count number of each label

In [17]:
label_counts = merged_df2['label'].value_counts()
label_counts

 Celebrity / gossip / entertainment / comedy              797
 Blogs / vlogs / viral videos / YouTubers / livestream    761
 News / politics                                          682
 Cars                                                     660
 Rap / lyrics / songs                                     596
                                                         ... 
 video editing / manga / dubs / anime                      22
 TV shows / comedy / The Office (U.S.)                     21
 dating / advice / women / gender                          21
 financial news / the stock market / Jim Cramer            20
 manga dubs / anime / Japanese manga                       17
Name: label, Length: 208, dtype: int64

In [18]:
# label_counts.to_csv('MiniLM_L6_topic_counts.csv')

In [135]:
# next time I should pass list of doc ids when training model, but for now
docs_index = [i for i, list_element in enumerate(docs)]
# or use model.get_documents_topics for assigning multiple topics (say 3 topics per document) for each document:
topics, topic_scores, topic_words, words_score = model.get_documents_topics(docs_index, num_topics = 3)

In [136]:
topics

array([[16,  4, 44],
       [13, 39, 15],
       [78, 59, 47],
       ...,
       [10, 65, 27],
       [ 4,  5, 16],
       [13, 15, 98]], dtype=int64)

In [137]:
topic_scores

array([[0.24028754, 0.21561778, 0.19289291],
       [0.316956  , 0.27451268, 0.24055976],
       [0.38893318, 0.37870815, 0.30973974],
       ...,
       [0.4066163 , 0.1886725 , 0.17475414],
       [0.3533792 , 0.20163605, 0.16692822],
       [0.30048004, 0.28197864, 0.26658103]], dtype=float32)

In [138]:
topic_words

array([[['muscles', 'exercise', 'muscle', ..., 'testosterone',
         'gain weight', 'weight gain'],
        ['probable cause', 'never mind', 'foreign ly', ...,
         'somehow someway', 'no matter', 'their lives'],
        ['russia', 'ukraine', 'russians', ..., 'civil war',
         'government', 'increasingly']],

       [['devalue yourself', 'orient yourself', 'inciting yourself',
         ..., 'self inflicted', 'mindset', 'success'],
        ['consciousness', 'human nature', 'existence', ...,
         'entire universe', 'psychological', 'certain things'],
        ['lord', 'scripture', 'bible', ..., 'lord forgive', 'believers',
         'bible says']],

       [['fifa', 'striker', 'rashford', ..., 'player picks', 'madrid',
         'bruno bruno'],
        ['layup', 'dribble', 'dunk', ..., 'score', 'upgraded',
         'defender'],
        ['golf', 'putt', 'golf ball', ..., 'slightly lower', 'grip',
         'ball rolling']],

       ...,

       [['wands', 'pentacles', 'swords',

In [139]:
words_score

array([[[0.5194265 , 0.5180615 , 0.5132043 , ..., 0.3662577 ,
         0.36318555, 0.3625637 ],
        [0.5168986 , 0.39020967, 0.37414983, ..., 0.28259832,
         0.2804138 , 0.27984416],
        [0.7740225 , 0.76879114, 0.75880563, ..., 0.34254542,
         0.33377853, 0.3331706 ]],

       [[0.5077165 , 0.50635356, 0.5056643 , ..., 0.35338295,
         0.35329416, 0.35183707],
        [0.6141967 , 0.4397596 , 0.41990763, ..., 0.3208779 ,
         0.32075384, 0.31812644],
        [0.6772778 , 0.67588615, 0.6634736 , ..., 0.42583317,
         0.42265412, 0.41921344]],

       [[0.71136165, 0.60479236, 0.5915388 , ..., 0.34719124,
         0.34565544, 0.34491283],
        [0.707268  , 0.63223875, 0.5963589 , ..., 0.29704255,
         0.2970086 , 0.29589468],
        [0.6955521 , 0.5428592 , 0.53712964, ..., 0.3227759 ,
         0.32263774, 0.32181904]],

       ...,

       [[0.7664448 , 0.763821  , 0.7116008 , ..., 0.5073045 ,
         0.5064099 , 0.50630474],
        [0.42447022, 

This works OK, but need to solve problem of square brackets in 'topic1_words' column - where did I do that - YT tags?

In [159]:
df2 = pd.DataFrame({'topic1':[t[0] for t in topics], 'topic1_score':[ts[0] for ts in topic_scores], 'topic1_words':[tw[0] for tw in topic_words]})
df2

Unnamed: 0,topic1,topic1_score,topic1_words
0,16,0.240288,"[muscles, exercise, muscle, glutes, diet, exer..."
1,13,0.316956,"[devalue yourself, orient yourself, inciting y..."
2,78,0.388933,"[fifa, striker, rashford, goalkeeper, neymar, ..."
3,15,0.350314,"[lord, scripture, bible, jesus, christ, lord j..."
4,65,0.558682,"[consequence, psychological, tragedy, humanity..."
...,...,...,...
13469,13,0.335936,"[devalue yourself, orient yourself, inciting y..."
13470,56,0.387461,"[ps, console, xbox, playstation, consoles, ps ..."
13471,10,0.406616,"[wands, pentacles, swords, sagittarius, energi..."
13472,4,0.353379,"[probable cause, never mind, foreign ly, never..."


In [158]:
df2['topic1_words'] = df2['topic1_words'].apply(lambda x: str(x).replace('[','').replace(']',''))
df2

Unnamed: 0,topic1,topic1_score,topic1_words
0,16,0.240288,'muscles' 'exercise' 'muscle' 'glutes' 'diet' ...
1,13,0.316956,'devalue yourself' 'orient yourself' 'inciting...
2,78,0.388933,'fifa' 'striker' 'rashford' 'goalkeeper' 'neym...
3,15,0.350314,'lord' 'scripture' 'bible' 'jesus' 'christ' 'l...
4,65,0.558682,'consequence' 'psychological' 'tragedy' 'human...
...,...,...,...
13469,13,0.335936,'devalue yourself' 'orient yourself' 'inciting...
13470,56,0.387461,'ps' 'console' 'xbox' 'playstation' 'consoles'...
13471,10,0.406616,'wands' 'pentacles' 'swords' 'sagittarius' 'en...
13472,4,0.353379,'probable cause' 'never mind' 'foreign ly' 'ne...
