# Bertopic modeling

In [1]:
# !pip install pyarrow==15.0.2 bertopic datasets accelerate bitsandbytes xformers adjustText
# required lib
from collections import Counter
import pandas as pd
import pickle
import re
import string
import tqdm

In [None]:
# our file is in json format, you can replace with your own file location and format
df = pd.read_json('xxxx', compression='gzip', lines=True) # for privacy reason, I deleted the URL
df.head()

Unnamed: 0,author,created_utc,id,selftext,subreddit,title,date,crime,false_match
0,[deleted],1546314346,abemmi,[deleted],Atlanta,Gunshots?,2019-01-01 03:45:46,True,False
1,[deleted],1546314372,abemqc,[removed],Atlanta,Anybody hear gunshots?,2019-01-01 03:46:12,True,False
2,[deleted],1546380318,abmjmw,[deleted],Atlanta,"Shot taken on a hazy, in-town hike a couple da...",2019-01-01 22:05:18,True,False
3,WildVelociraptor,1546439160,abtu2p,,Atlanta,Gwinnett County teen commits suicide after acc...,2019-01-02 14:26:00,True,False
4,Downbad2018,1546482755,ac0vud,I'm in the process of leaving Charlotte and I ...,Atlanta,Material difference between Charlotte and Atlanta,2019-01-03 02:32:35,True,False


In [10]:
# Concatenate and clean text, with a colon after the title
def concatenate_and_clean(row):
    # Concatenate title and selftext with a colon after the title
    concatenated_text = f"{row['title']}: {row['selftext']}"
    # Remove unwanted phrases
    cleaned_text = concatenated_text.replace('[removed]', '').replace('[deleted]', '').strip()
    return cleaned_text

# Apply the function to the dataframe to create a new 'text' column
df['text'] = df.apply(concatenate_and_clean, axis=1)
df.head()

Unnamed: 0,author,created_utc,id,selftext,subreddit,title,date,crime,false_match,text
0,[deleted],1546314346,abemmi,[deleted],Atlanta,Gunshots?,2019-01-01 03:45:46,True,False,Gunshots?:
1,[deleted],1546314372,abemqc,[removed],Atlanta,Anybody hear gunshots?,2019-01-01 03:46:12,True,False,Anybody hear gunshots?:
2,[deleted],1546380318,abmjmw,[deleted],Atlanta,"Shot taken on a hazy, in-town hike a couple da...",2019-01-01 22:05:18,True,False,"Shot taken on a hazy, in-town hike a couple da..."
3,WildVelociraptor,1546439160,abtu2p,,Atlanta,Gwinnett County teen commits suicide after acc...,2019-01-02 14:26:00,True,False,Gwinnett County teen commits suicide after acc...
4,Downbad2018,1546482755,ac0vud,I'm in the process of leaving Charlotte and I ...,Atlanta,Material difference between Charlotte and Atlanta,2019-01-03 02:32:35,True,False,Material difference between Charlotte and Atla...


In [11]:
# Check if the text is less than 5 characters or contains only a URL
def is_short_or_url(text):
    # Check if the text has less than 5 characters
    if len(text) < 5:
        return True
    # Check if the text contains only a URL
    if re.match(r'^https?://\S+$', text):
        return True
    return False

# Apply the function to the 'text' column
df['short_or_url'] = df['text'].apply(is_short_or_url)


# Filter the rows where the condition is True
filtered_df = df[df['short_or_url']]
print('dropping %d rows that are < 5 chars or just a url' % len(filtered_df))
df = df[~df['short_or_url']].drop(columns=['short_or_url'])
df.shape

dropping 82 rows that are < 5 chars or just a url


(51429, 10)

In [12]:
# Skipping this step. -awc
# Remove duplicate values.
# df.drop_duplicates(subset='text', inplace=True)
# df.shape

In [13]:
# function taken and modified
# from https://towardsdatascience.com/cleaning-text-data-with-python-b69b47b97b76

def simple_text_clean(x):
    # first we lowercase everything
    #x = x.lower()

    # remove unicode characters
    #x = x.encode('ascii', 'ignore').decode() # not for finnish

    # remove @ symbols and hashtags
    #x = re.sub(r'@\S', '', x)
    #x = re.sub(r'#\S+', ' ', x)
    x = re.sub('([#])','',x )
    x = re.sub('([@])','',x )
    x = re.sub('^[RT]+','',x ) # delete RT

    # remove words less then 2 characters
    x = re.sub(r'\b\w{1,1}\b', '', x)

    # Remove URL
    x = re.sub(r'https*\S+', ' ', x)
    x = re.sub(r'http*\S+', ' ', x)

    # Remove ticks and the next character
    x = re.sub("\'\w+", '', x)
    # Remove punctuations
    x = re.sub('[%s]' % re.escape(string.punctuation), ' ', x)
    # Remove numbers
    x = re.sub(r'\w*\d+\w*', '', x)
    # Replace the over spaces
    x = re.sub('\s{2,}', " ", x)
    #x = re.sub(r'\s[^\w\s]\s', '', x)

    return x

In [14]:
df['cleaned_text'] = df.text.apply(simple_text_clean)

# remove not only empty strings, but also strings only containing whitespace
# https://stackoverflow.com/questions/29314033/drop-rows-containing-empty-cells-from-a-pandas-dataframe
df = df[df['cleaned_text'].str.strip().astype(bool)]
df.shape

(51429, 11)

In [15]:
# Skipping this step. -awc
# # Remove duplicate values
# df.drop_duplicates(subset='cleaned_text', inplace=True)
# df.shape

In [16]:
pd.set_option('display.max_colwidth', None)
df.cleaned_text.head(5)

0                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       

## Sentence Embedded

In [None]:
# Bert needs text in list
tweet_list = df.cleaned_text.to_list()
timestamp = df.date.to_list() # if you have a date stamp, use it here for timeline analysis

In [None]:
# The embedding model we will use
from sentence_transformers import SentenceTransformer

# Prepare embeddings
# models could vary based on data sets, check out huggingface
# since we analyze short text, we used this model but even this is now outdated

sentence_model = SentenceTransformer("BAAI/bge-small-en")

# you can also use normal embeddings, as well as BERT type transformers
# check BERTopic github page for details

  from tqdm.autonotebook import tqdm, trange


In [None]:
# Since we will use embedding models several times until we find a satisfactory results, it is wise to create embeddings in advance
# Else bertopic will do the same process repeatedly
# this saves time

embeddings = sentence_model.encode(tweet_list, show_progress_bar=True)

Batches:   0%|          | 0/1608 [00:00<?, ?it/s]

In [18]:


# #Store sentences & embeddings on disc
with open('topic/embeddings.pkl', "wb") as fOut:
    pickle.dump({'sentences': tweet_list,'timestamp':timestamp, 'embeddings': embeddings}, fOut, protocol=pickle.HIGHEST_PROTOCOL)

In [None]:
# if you want to continue with embeddings on the memory, use these
stored_sentences = tweet_list
stored_timestamp = timestamp
stored_embeddings = embeddings

# Model

## Embedded data

In [None]:
# if you turn back to BERTopic after above stage, you can start from here

import pickle

#Load sentences & embeddings from disc
with open('topic/embeddings.pkl', "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_sentences = stored_data['sentences']
    stored_timestamp = stored_data['timestamp']
    stored_embeddings = stored_data['embeddings']

In [None]:
# make sure about data size
print('embedding size:',len(stored_embeddings))
print(f'Timestamp Size: {len(stored_timestamp)}')
print('tweet size:',len(stored_sentences))

embedding size: 51429
Timestamp Size: 51429
tweet size: 51429


In [4]:
# The embedding model we will use
from sentence_transformers import SentenceTransformer

# Prepare embeddings
sentence_model = SentenceTransformer("BAAI/bge-small-en")


  from tqdm.autonotebook import tqdm, trange


## Bertopic

In [None]:
import numpy as np
import pandas as pd

from bertopic import BERTopic


from sklearn.feature_extraction.text import CountVectorizer
from umap import UMAP
from sklearn.decomposition import PCA

from hdbscan import HDBSCAN

from nltk.corpus import stopwords

import nltk
nltk.download('stopwords')
# for the local computer warning
import os
os.environ["TOKENIZERS_PARALLELISM"] = "false" # normally it is false
#print(stopwords.words('english'))

# Stopwords
stoplist = list(stopwords.words('english')) + ['http', 'https', 'amp', 'com', 'co'] # you can include as much as you want
# I suggest to add words and terminologies if you are working on specific topic
# for example, if you analyze how LLM is used in education, better to delete LLM, ai, chatgpt, chatbox type words because they more or less appear in all texts

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/qge5yv/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
from bertopic.representation import KeyBERTInspired, MaximalMarginalRelevance, TextGeneration


# these models are useful to extract different representative words from topics
# Keybert and MMR are more useful compared to default model results
# Instead of words, LLMs provide description about your topics

# KeyBERT
keybert = KeyBERTInspired()

# MMR
mmr = MaximalMarginalRelevance(diversity=0.3)

# if you are using cloud server, you can include LLM models
# check details in BERTopic github page

# Text generation with Llama 3
#llama3 = TextGeneration(generator, prompt=prompt)

# All representation models
representation_model = {
    "KeyBERT": keybert,
    #"Llama2": llama3,
    "MMR": mmr,
}

In [7]:
import random

# Set random seed
seed_value = 42
np.random.seed(seed_value)
random.seed(seed_value)

# https://towardsdatascience.com/basics-of-countvectorizer-e26677900f9c
# max_df is used for removing terms that appear too frequently, also known as "corpus-specific stop words".
# max_df = 0.50 means "ignore terms that appear in more than 50% of the documents".
# max_df = 25 means "ignore terms that appear in more than 25 documents".

#min_df is used for removing terms that appear too infrequently.
#min_df = 0.01 means "ignore terms that appear in less than 1% of the documents".
#min_df = 5 means "ignore terms that appear in less than 5 documents".

#The CountVectorizer will select the words/features/terms which occur the most frequently.

vectorizer_model = CountVectorizer(ngram_range=(1, 3),
                                   stop_words=stoplist,
                                   max_features=10000,
                                   min_df= 0.01,
                                   max_df= 0.90
                                   )

#umap models
# lower values of n_neighbors will focus more on the very local structure
# Increasing this value often results in larger clusters being created.

# n_components parameter controls the dimensionality of the final embedded data
# Increasing this value too much and HDBSCAN will have a hard time clustering the high-dimensional embeddings.
# min_dist= We will look at a range of values from 0.0 through to 0.99.

umap_model = UMAP(n_neighbors=40,
                  n_components=5, # default 5
                  min_dist=0.1,    # default 0.1
                  metric='cosine',
                 random_state= 42)
#dim_model = PCA(n_components=5)


In [17]:
# Skip dimensionality reduction
from bertopic import BERTopic
from bertopic.dimensionality import BaseDimensionalityReduction



# HDBSCAN model
# min_cluster_size is arguably the most important parameter in HDBSCAN
# it controls the smallest grouping you want to consider as a cluster
# Increasing this value results in less clusters but of larger size
# whereas decreasing this value results in more micro clusters being generated.
# Typically, I would advise on increasing this value rather than decreasing it.

# min_samples is automatically set to min_cluster_size and controls the amount of outliers are generated.
# defaults to being equal to min_cluster_size if unspecified,
# Setting this value significantly lower than min_cluster_size
# might help you reduce the amount of noise you will get..

hdbscan_model = HDBSCAN(min_cluster_size=80, # default 10
                        min_samples=4, #The larger it is, the more points are discarded as noise/outliers.
                        #cluster_selection_epsilon= 0.5,# separate clusters that are less than 0.5 units apart
                        metric='euclidean',
                        cluster_selection_method='eom',
                        prediction_data=True)

random.seed(seed_value)
# Fit BERTopic without actually performing any dimensionality reduction
topic_model = BERTopic(
                       embedding_model=sentence_model, # if not using embedding model
                       vectorizer_model=vectorizer_model,
                       hdbscan_model=hdbscan_model,
                       top_n_words=10,
                       min_topic_size=15, # larger value, broader document coverage
                       #nr_topics="auto",
                       calculate_probabilities= True,
                       verbose=True,
                       # need this to skip dimensionality reduction; otherwise reduces to 5 components using umap by default.
                       #  umap_model=BaseDimensionalityReduction(),
                       # but, these results are not very good (half in cluster 0)
                       representation_model=representation_model)


#topics, probabilities = topic_model.fit_transform(stored_sentences, stored_embeddings)
topics, probs = topic_model.fit_transform(stored_sentences, stored_embeddings)
df['topic'] = topics

2024-09-20 17:40:39,888 - BERTopic - Dimensionality - Fitting the dimensionality reduction algorithm
2024-09-20 17:40:48,923 - BERTopic - Dimensionality - Completed ✓
2024-09-20 17:40:48,924 - BERTopic - Cluster - Start clustering the reduced embeddings
2024-09-20 17:41:01,311 - BERTopic - Cluster - Completed ✓
2024-09-20 17:41:01,316 - BERTopic - Representation - Extracting topics from clusters using representation models.
2024-09-20 17:41:16,745 - BERTopic - Representation - Completed ✓


In [18]:
df.topic.value_counts(normalize=False)

topic
-1     20194
 0      3483
 1      1987
 2      1433
 3      1191
       ...  
 88       89
 89       87
 90       83
 91       83
 92       80
Name: count, Length: 94, dtype: int64

In [19]:
# lots of -1 docs; and they look pretty relevant
[i[:100] for i in df[df.topic==-1].sample(n=50).text.values]

["Pres Trump backs Int'l Soros child rape ring for $4billion bribe Pelosi rec'd $3b 'to ensure safe pa",
 'Sources: Man accused of shooting two St. Louis police officers has a lengthy criminal history:',
 'Neighbor calls cops on bipolar queens man for gun possession (?). Cops find no gun but taser man to ',
 'I need a new neighborhood. Priced out of mine.: I’ve lived in BV since I moved to mke in 2002 and I ',
 'Murder charges filed in Logan Square fatal shooting last month, police say:',
 'Cook County Jail inmate charged with murder in beating death of cellmate:',
 '31 year prison sentence for Lovelle Jordan. Initiated shootout with police outside 25th District sta',
 'Two Jewish Men shot in Pico-Robertson - Two Days in a row:',
 'Houstonians... please help me with this neighborhood...: I have found a job in Houston. For next 6 -',
 'Seven bodies, nothing stolen: Were killings at Riverside marijuana grow ‘a message’?:',
 'McKinley Park Neighborhood Crime Rises 34 Percent in 2022 Over 

In [None]:
# we enforce to distribute texts across topics with defined theshold level
# it is not always necessary, but sometimes useful
# outlier reduction
new_topics = topic_model.reduce_outliers(stored_sentences, topics, strategy="c-tf-idf", embeddings=stored_embeddings, threshold=0.1)
print('%d outliers remain' % Counter(new_topics)[-1])

3927 outliers remain


In [None]:
# we are comparing new distribution of topics
topic_diffs = pd.DataFrame([Counter(topics), Counter(new_topics)], index=['old', 'new']).T
topic_diffs['diff'] = topic_diffs.new-topic_diffs.old
topic_diffs.sort_values('diff', ascending=False).head(20)

Unnamed: 0,old,new,diff
2,1433,2105,672
11,613,1201,588
31,245,776,531
20,370,896,526
30,254,710,456
17,531,930,399
69,121,475,354
61,142,487,345
59,146,486,340
65,129,458,329


In [None]:
# update topics
topic_model.update_topics(stored_sentences, topics=new_topics)
df['topic'] = new_topics



In [23]:
# Check the type and length of topics
print("Type of topics:", type(topics))  # Should be a list or numpy array
print("Length of topics:", len(topics))  # Should match the length of stored_sentences


Type of topics: <class 'list'>
Length of topics: 51429


In [24]:
freq = topic_model.get_topic_info()
print("Number of topics: {}".format( len(freq)))
freq


Number of topics: 94


Unnamed: 0,Topic,Count,Name,Representation,KeyBERT,MMR,Representative_Docs
0,-1,3927,-1_shooting_in_on_the,"[shooting, in, on, the, shot, at, for, to, crime, of]","[community, chicago, neighborhood, home, nyc, public, back, houston, street, memphis]","[crime, like, shooting, murder, chicago, stolen, school, nyc, years, home]","[Does LA ever invoke anxiety or some negative thoughts in you feel crazy posting this but it’ something that ’ve thought about for years First let me say ’ve lived in LA on and off over past several years Sometimes ’ve been more mentally and emotionally comfortable and other times feel like ’ on some sort of brink like where have no security and feel very threatened with nowhere and nobody to turn for security welcome to being an adult start to have these relentless negative thoughts Sometimes like if in traffic “what if several people pulled me out of my car and beat the shit out of me ” Or what if someone broke in my house and takes my dog Both of these would be strange and rare and even difficult to accomplish but they cross my mind in LA actually really enjoy LA for many reasons but there’ something present when ’ there that induces anxiety within me at times often feel it after encounter stereotypical homeless or unhoused person on the street feel this wave of emotion and wondering what happened to them in their life and then my thoughts quickly spiral out of control and begin imagining if lost my job and couldn’ pay bills imagine myself sitting on the street in filth and trash with my dog next to me Sometimes these thoughts make me want to cry This like the earlier examples is unlikely if lost my job ’ probably find new one and move on and make things work like tens of thousands of people do every year Still these thoughts are persistent and strong and are triggered by certain situations like being stuck on the or when witness something obscene or hear about something violent and gang related The Netflix doc “Hotel Cecil” seriously triggered these feelings know part of how feel is related to this sense of lawlessness and strange emotional helplessness feeling “no one is going to help you you’re fucked ” The worst ever had it was years ago near Santa Monica was going to get juice or something Stopped at light maybe somewhere on Pico and happened to look to my right where several EMTs or whatever type of first responders were standing over barely conscious woman She was completely naked sitting beside her wheelchair and looked in terrible shape This is at about pm That seriously seriously fucked me up and for years ’ve pondered how does human life arrive at that point what pain and suffering must have endured for so many years and will that be me someday Does anyone else have these thoughts or emotions and what do you think may drive them in our environment , Free Cheap Notable Comedy Shows in Town This Week Houston has stellar comedy scene and list all the shows know about for you each week focus on standup shows but there are plenty great improvisational comedy shows to catch at Station Theater and Comedy Sportz MONDAY THE SECRET GROUP PM Gay Shame Parade Pay what you can This bi monthly comedy showcase is run by gays and features gays possible gays and people we wish were gay More info here RUDYARD PM Open Mic FREE One of the best comedy open mics in the country happens every Monday at Rudyard They got low ceilings excellent pub food and full bar making it feel like an old school comedy club minus drink minimums plus the lineup is curated by Houston comedy queen Dusti Rhodes More info here AXELRAD pm Comedy Night Upstairs at Axelrad FREE Brand new weekly comedy show hosted by Jeff Joe and Robert Turo More info here PJ SPORTS BAR PM Open Mic FREE Weekly open mic upstairs at PJ presented by Cutthroat Comedy TUESDAY THE HOUSTON IMPROV PM Open Mic Happy Hour FREE The open mic returns this month to Houston last remaining traditional comedy club More info here BOONDOCKS PM Open Mic FREE Boondocks is dive bar with second story that has stage in it It fantastic for comedy and it comedian favorite The bartenders are heavy handed and the laughs are free More info here THE SECRET GROUP PM Trash Flavored Trash FREE Weekly debaucherous late night show where hosts Andrew Youngblood and Zahid Dewji pour liquor into the mouths of first row attendees Also wells are only after More info here PM Pass The Mic Midnight Comedy Open Mic FREE hostless open mic with wells This is definitely bad idea More info here WEDNESDAY AMERICAN LEGION POST PM Greater Heights Comedy Night Slade Ham This is fundraiser to support veterans Ticket price includes two drinks and catered dinner from Spaghetti Western More info here THE HOUSTON IMPROV PM Summer Break Showcase Dusti Rhodes and others on this showcase of locals celebrating summer break More info here THE SECRET GROUP PM Bill Comedy Show Weekly comedy show featuring hilarious comics Tecates Espolon and free chips and salsa More info here LIBERTY STATION PM Comedy Showcase at Liberty Station FREE Every week we showcase Houston amazing comedic talent just for you for free All you have to do is show up and have good time More info here DARWIN PUB EATERY PM Comedy Open Mic FREE Sign up at and show at Hosted by Matt Rogers THURSDAY THE SECRET GROUP PM FREEDOMTOWN USA COMEDY SHOW FREE free comedy show with free shots free swag and free barbecue Also celebrating the very American tradition of tipping Info link pending AVANT GARDEN PM Upstairs GODDAMMIT FREE is one of Houston best comedy showcases Drinks are irresponsibly cheap it free unless you feel like donating and the taco truck that on the property is world class Every week the show features some of the best comics in Houston as well as frequent drop ins from out of towners and famous comedians Doors at show starts at usually realistically PM Upstairs Avant Garden Open Mic Sign up towards the end of Goddammit FRIDAY THE SECRET GROUP PM Matthew Broussard The hilarious smart and handsome Rice graduate returns to Houston for one night only More info here PM Box Filth This monthly showcase comes with unfiltered hilarious comedy Hosted by Doug Dalton and Nick Palermo More info here AM Box Midnight in The Box FREE comedy showcase hosted by Doug Dalton and featuring wells from to More info here HOUSTON CIDER CO PM Stand Up Throwdown FREE Weekly show at Houston Cider Company formerly Town City Brewing hosted by Steve Cantwell More info here SAINT BERNARD PUB PM Comedy Hour Tressa Eleby hosts this show featuring Kari Burt Mayam Robinson and birthday girl Shelly More info here SATURDAY THE SECRET GROUP PM You Now Have the Floor Comedic Town Hall Show Comics perform in character as concerned citizens bringing issues to town hall Produced by Conner Clifton More info here PM Box Andy Huggins All My Grandchildren Houston legendary Andy Huggins hosts and selects some of his favorite up and comers for this special showcase More info here PM Box The Speedball Open Mic FREE is fast paced minute open mic for newbies and seasoned vets alike More info here PJ SPORTS BAR PM Hotter Than Hell FREE Ugly Puppy yearly comedy showcase returns More info here SUNDAY THE SECRET GROUP PM Wonder Comedy Hour Weekly comedy show where gets you pint glass an Wonder beer and comedy show Hosted by Josh Stokes More info here PM Box Roast Battle Road to LA Prelim FREE Local comics square off in roast battle Top two competitors after weeks of competition will fly out to LA to compete on the official Roast Battle More info here PM The Secret Group Open Mic Free Weekly comedy open mic Sign up at More info here NOTABLE SHOWS MORE THAN WEEK OUT Chris Fairbanks Kevin Fraser Erica Rhodes If forgotten anything please message me or leave comment and take care of it , Potentially moving to KC deciding between neighborhoods promise done research Hey everyone am potentially changing jobs and think may have found good fit at KU Medical Center The job would start in May so need to move pretty quickly ’ve briefly visited Kansas City twice and really loved it but because was only there briefly couldn’ get great feel for the different neighborhoods ’ve read through previous threads about the neighborhoods and think have an alright feel about what area might be good fit for me but wanted to make post and hear people’ thoughts Thanks in advance for any help you can provide My budget is pretty flexible – am coming from city with high cost of living would prefer not to but if an area exists that meets the needs below would be open to paying up to month for bedroom Also don have preference for living in KS vs MO is there reason should This is new to me am just looking to rent My priorities are below and in order Thanks again Quiet and safe – am really sensitive to noise cars people outside and want to be away from major thoroughfares or from streets where people hang out on the weekends ’ thinking like bars concert venues Within or reasonably close mile to neighborhood with local coffee restaurants etc Basically somewhere could walk around on the weekend and see other people places Within minute drive of grocery stores will have car but ideally ’ be able to take public transit to work and get there in less than minutes If need to drive that is cool just like riding public transit Has outdoor parks for running The two neighborhoods that stuck out as possibly for me are Volker and Brookside Volker sounds really cool but read because it houses lot of younger people and it can get loud at night due to parties especially because it is close to Westport also read about random car break ins realize this can happen anywhere but just saw it mentioned more commonly for Volker Brookside seems nice but wasn’ sure what the area is actually like Also someone had recommended place near the Historic Country Club Plaza just because the apartment seemed nice but can’ get good read on what the area is like Thanks again for any help you can provide ]"
1,0,3545,0_shot_shots_film_of,"[shot, shots, film, of, this, from, drone, some, the, my]","[shot film, film shot, another shot, video shot, shots, shot took, shot, favorite shots, shoot, drone shot]","[film, video, photos, skyline, photography, sunset, favorite, drone shot, shoot, music video]","[FQ TUK TUK Shot on Nishika , Cinestill Shot On Nikon , Have you had that Shot ]"
2,1,2184,1_gunshots_anyone_fireworks_heard,"[gunshots, anyone, fireworks, heard, shooting, hear, was, on, what, shots]","[heard gunshots, hear gunshots, gunshots fireworks, gunshots, heard shots, shootings, gunshot, shots fired, anyone else hear, gunfire]","[gunshots, fireworks, shooting, shots, loud, news, anyone else hear, gunfire, last night, gun shots]","[ by District Station Anyone else hear the gunshots at Anyone know what happened , Gunshots in Lindbergh Did anyone else hear what sounded like gunshots in Lindbergh about minutes ago We saw APD head in the direction of the shots but don’ know what’ going on with that Alternatively does anyone know if there’ police bulletin or something we can check , Anyone else hear the explosion live in Belleville and we keep hearing explosions few minutes ago there was really loud one and several sirens shortly after Anyone know what going on Not gunshots or fireworks Sounded like lot of pressure]"
3,2,2105,2_shooting_shot_dead_mass,"[shooting, shot, dead, mass, injured, park, man, killed, wounded, at]","[shooting, shooting shooting, shooting near, shootings, another shooting, gunshots, shooting outside, people shot, shot near, shooter]","[shooting, highland park, mass shooting, shooting near, shots fired, people shot, shot killed, milwaukee, park shooting, shootings]","[Maine Shooting , Pixel Shooting , Shooting at and Carmen ]"
4,3,1509,3_assault_sexual_rape_sex,"[assault, sexual, rape, sex, man, raped, woman, charged, accused, arrested]","[sexual assault, accused sexual assault, sexually assault, sex assault, assaulted, accused sexual, charged assault, assault charges, assaults, assault]","[assault, sexual assault, raped, rapist, arrested, sex assault, attempted rape, nyc, women, allegations]","[Weinstein charged with sexual assault in Los Angeles , Ypsilanti man convicted in sexual assault case , Sexual Assault ]"
...,...,...,...,...,...,...,...
89,88,118,88_scooter_moped_stolen_scooters,"[scooter, moped, stolen, scooters, it, vespa, my, riding, electric, if]","[scooter stolen, stolen, scooter, stolen today, got stolen, theft, scooters, get stolen, moped, police report]","[scooter, scooter stolen, moped, scooters, honda, sticker, parking, ride, get stolen, guys]","[Stolen Scooter , stolen scooter , Stolen scooter ]"
90,89,140,89_orleans_new_quarter_french,"[orleans, new, quarter, french, bourbon, shot, nopd, police, shooting, street]","[new orleans police, shot killed, people shot, killed shooting, orleans police, fatally shot, new orleans, shot dead, woman shot, shooting shot]","[new orleans, french quarter, new orleans police, orleans police, shooting new, bourbon street, police say, nopd says, new orleans east, shot killed]","[New Orleans police officer shot in French Quarter , Man fatally shot in French Quarter New Orleans police say , New Orleans Louisa shooting ]"
91,90,132,90_carjackings_chicago_carjacking_cwb,"[carjackings, chicago, carjacking, cwb, armed, police, spree, robberies, een, in]","[carjackings, chicago crime, carjacking, armed carjacking, chicago police, chicago police say, carjack, chicago, across chicago, robberies]","[carjackings, carjacking, chicago, chicago police, cwb chicago, chicago police say, bucktown, arrests, chicago crime, wicker park]","[Chicago’ carjacking spree visualized , Carjackings in Chicago more than double in , Carjackings in Chicago ]"
92,91,247,91_violent_crime_mayor_total,"[violent, crime, mayor, total, detroit, fbi, rate, cities, city, homicides]","[violent crime rate, detroit police, violent crime, city crime, crime rate, crime, crime spike, crime new, detroit, crime plan]","[city leaders, crime plan, violent crime rate, crime areas, fight crime, attorney general, michigan, police chief, high crime, criminals]","[Violent crime rate in Detroit fell in police chief announces , New FBI crime report Detroit violent crime leads the nation , Detroit Mayor Says Plan In Works To Address Recent Spike In Crime ]"


In [None]:
# let's look at the extracted words in topic 1
topic_model.get_topic(1)

[('gunshots', np.float64(0.022669395235948128)),
 ('anyone', np.float64(0.01830098736062849)),
 ('fireworks', np.float64(0.017949794179444485)),
 ('heard', np.float64(0.017265067021071053)),
 ('shooting', np.float64(0.013858283398165389)),
 ('hear', np.float64(0.013678860469208489)),
 ('was', np.float64(0.012750080035884903)),
 ('on', np.float64(0.011429623339518625)),
 ('what', np.float64(0.011420309975278974)),
 ('shots', np.float64(0.011174566078856338))]

In [26]:
# file_path = "/content/gdrive/My Drive/Current Projects/Tulane/"
!mkdir -p ./figures
file_path="./"

# Generate the figure using BERTopic
bar_topics = topic_model.visualize_barchart(top_n_topics=90)
#bar_topics.write_html(file_path+"figures/bartopics.html")
# Save the file by concatenating the file path and the file name
bar_topics.write_html(file_path + "figures/bartopics.html")
bar_topics

In [27]:
# Generate the figure using BERTopic
topic_distance = topic_model.visualize_topics()
#topic_distance.write_html("figures/topicdistance.html")
topic_distance.write_html(file_path + "figures/topicdistance.html")
topic_distance

In [28]:

topic_heatmap = topic_model.visualize_heatmap()
topic_heatmap.write_html(file_path + "figures/topic_heatmap.html")
topic_heatmap

In [29]:

# Generate the figure using BERTopic
topic_hierarchy = topic_model.visualize_hierarchy()
topic_hierarchy.write_html(file_path + "figures/topic_hierarchy.html")
topic_hierarchy

## Representative Posts

In [None]:
# # Store topics in a new variable to avoid overwriting
# topic_assignments = topics.copy()
# # Create the DataFrame
# posts = pd.DataFrame({'topic': topic_assignments, 'document': stored_sentences})


In [None]:
# posts

In [None]:
# topic_0 = posts[posts.topic == 0]
# topic_0

In [30]:
topic_labels = topic_model.generate_topic_labels(nr_words=3,
                                                 topic_prefix=False,
#                                                  word_length=10,
                                                 separator=", ")
topic_labels

['shooting, in, on',
 'shot, shots, film',
 'gunshots, anyone, fireworks',
 'shooting, shot, dead',
 'assault, sexual, rape',
 'stolen, thief, theft',
 'nypd, shot, brooklyn',
 'baltimore, police, in',
 'car, stolen, my',
 'dog, cat, dogs',
 'chicago, police, shot',
 'stabbed, death, man',
 'me, was, he',
 'hate, asian, crime',
 'vaccine, covid, governor',
 'old, year, shot',
 'we, you, and',
 'chicago, violence, crime',
 'crime, rate, is',
 'protest, protests, the',
 'york, nyc, new',
 'area, is, to',
 'houston, hpd, shot',
 'prosecutors, cwb, chicago',
 'carjacking, carjackings, attempted',
 'gov, vote, election',
 'gun, ranges, shoot',
 'bike, stolen, bikes',
 'stolen, car, vehicle',
 'bike, bikes, stolen',
 'gun, guns, store',
 'robbery, armed, gunpoint',
 'murder, suicide, murders',
 'subway, brooklyn, station',
 'armed, robberies, lincoln',
 'deputy, sheriff, deputies',
 'podcast, murder, true',
 'they, to, the',
 'catalytic, converter, theft',
 'louis, st, county',
 'officer, po

In [31]:
# Number of topics
freq = topic_model.get_topic_info()
print("Number of topics: {}".format( len(freq)))

display(freq)
# Head
#freq[1:21]


Number of topics: 94


Unnamed: 0,Topic,Count,Name,Representation,KeyBERT,MMR,Representative_Docs
0,-1,3927,-1_shooting_in_on_the,"[shooting, in, on, the, shot, at, for, to, crime, of]","[community, chicago, neighborhood, home, nyc, public, back, houston, street, memphis]","[crime, like, shooting, murder, chicago, stolen, school, nyc, years, home]","[Does LA ever invoke anxiety or some negative thoughts in you feel crazy posting this but it’ something that ’ve thought about for years First let me say ’ve lived in LA on and off over past several years Sometimes ’ve been more mentally and emotionally comfortable and other times feel like ’ on some sort of brink like where have no security and feel very threatened with nowhere and nobody to turn for security welcome to being an adult start to have these relentless negative thoughts Sometimes like if in traffic “what if several people pulled me out of my car and beat the shit out of me ” Or what if someone broke in my house and takes my dog Both of these would be strange and rare and even difficult to accomplish but they cross my mind in LA actually really enjoy LA for many reasons but there’ something present when ’ there that induces anxiety within me at times often feel it after encounter stereotypical homeless or unhoused person on the street feel this wave of emotion and wondering what happened to them in their life and then my thoughts quickly spiral out of control and begin imagining if lost my job and couldn’ pay bills imagine myself sitting on the street in filth and trash with my dog next to me Sometimes these thoughts make me want to cry This like the earlier examples is unlikely if lost my job ’ probably find new one and move on and make things work like tens of thousands of people do every year Still these thoughts are persistent and strong and are triggered by certain situations like being stuck on the or when witness something obscene or hear about something violent and gang related The Netflix doc “Hotel Cecil” seriously triggered these feelings know part of how feel is related to this sense of lawlessness and strange emotional helplessness feeling “no one is going to help you you’re fucked ” The worst ever had it was years ago near Santa Monica was going to get juice or something Stopped at light maybe somewhere on Pico and happened to look to my right where several EMTs or whatever type of first responders were standing over barely conscious woman She was completely naked sitting beside her wheelchair and looked in terrible shape This is at about pm That seriously seriously fucked me up and for years ’ve pondered how does human life arrive at that point what pain and suffering must have endured for so many years and will that be me someday Does anyone else have these thoughts or emotions and what do you think may drive them in our environment , Free Cheap Notable Comedy Shows in Town This Week Houston has stellar comedy scene and list all the shows know about for you each week focus on standup shows but there are plenty great improvisational comedy shows to catch at Station Theater and Comedy Sportz MONDAY THE SECRET GROUP PM Gay Shame Parade Pay what you can This bi monthly comedy showcase is run by gays and features gays possible gays and people we wish were gay More info here RUDYARD PM Open Mic FREE One of the best comedy open mics in the country happens every Monday at Rudyard They got low ceilings excellent pub food and full bar making it feel like an old school comedy club minus drink minimums plus the lineup is curated by Houston comedy queen Dusti Rhodes More info here AXELRAD pm Comedy Night Upstairs at Axelrad FREE Brand new weekly comedy show hosted by Jeff Joe and Robert Turo More info here PJ SPORTS BAR PM Open Mic FREE Weekly open mic upstairs at PJ presented by Cutthroat Comedy TUESDAY THE HOUSTON IMPROV PM Open Mic Happy Hour FREE The open mic returns this month to Houston last remaining traditional comedy club More info here BOONDOCKS PM Open Mic FREE Boondocks is dive bar with second story that has stage in it It fantastic for comedy and it comedian favorite The bartenders are heavy handed and the laughs are free More info here THE SECRET GROUP PM Trash Flavored Trash FREE Weekly debaucherous late night show where hosts Andrew Youngblood and Zahid Dewji pour liquor into the mouths of first row attendees Also wells are only after More info here PM Pass The Mic Midnight Comedy Open Mic FREE hostless open mic with wells This is definitely bad idea More info here WEDNESDAY AMERICAN LEGION POST PM Greater Heights Comedy Night Slade Ham This is fundraiser to support veterans Ticket price includes two drinks and catered dinner from Spaghetti Western More info here THE HOUSTON IMPROV PM Summer Break Showcase Dusti Rhodes and others on this showcase of locals celebrating summer break More info here THE SECRET GROUP PM Bill Comedy Show Weekly comedy show featuring hilarious comics Tecates Espolon and free chips and salsa More info here LIBERTY STATION PM Comedy Showcase at Liberty Station FREE Every week we showcase Houston amazing comedic talent just for you for free All you have to do is show up and have good time More info here DARWIN PUB EATERY PM Comedy Open Mic FREE Sign up at and show at Hosted by Matt Rogers THURSDAY THE SECRET GROUP PM FREEDOMTOWN USA COMEDY SHOW FREE free comedy show with free shots free swag and free barbecue Also celebrating the very American tradition of tipping Info link pending AVANT GARDEN PM Upstairs GODDAMMIT FREE is one of Houston best comedy showcases Drinks are irresponsibly cheap it free unless you feel like donating and the taco truck that on the property is world class Every week the show features some of the best comics in Houston as well as frequent drop ins from out of towners and famous comedians Doors at show starts at usually realistically PM Upstairs Avant Garden Open Mic Sign up towards the end of Goddammit FRIDAY THE SECRET GROUP PM Matthew Broussard The hilarious smart and handsome Rice graduate returns to Houston for one night only More info here PM Box Filth This monthly showcase comes with unfiltered hilarious comedy Hosted by Doug Dalton and Nick Palermo More info here AM Box Midnight in The Box FREE comedy showcase hosted by Doug Dalton and featuring wells from to More info here HOUSTON CIDER CO PM Stand Up Throwdown FREE Weekly show at Houston Cider Company formerly Town City Brewing hosted by Steve Cantwell More info here SAINT BERNARD PUB PM Comedy Hour Tressa Eleby hosts this show featuring Kari Burt Mayam Robinson and birthday girl Shelly More info here SATURDAY THE SECRET GROUP PM You Now Have the Floor Comedic Town Hall Show Comics perform in character as concerned citizens bringing issues to town hall Produced by Conner Clifton More info here PM Box Andy Huggins All My Grandchildren Houston legendary Andy Huggins hosts and selects some of his favorite up and comers for this special showcase More info here PM Box The Speedball Open Mic FREE is fast paced minute open mic for newbies and seasoned vets alike More info here PJ SPORTS BAR PM Hotter Than Hell FREE Ugly Puppy yearly comedy showcase returns More info here SUNDAY THE SECRET GROUP PM Wonder Comedy Hour Weekly comedy show where gets you pint glass an Wonder beer and comedy show Hosted by Josh Stokes More info here PM Box Roast Battle Road to LA Prelim FREE Local comics square off in roast battle Top two competitors after weeks of competition will fly out to LA to compete on the official Roast Battle More info here PM The Secret Group Open Mic Free Weekly comedy open mic Sign up at More info here NOTABLE SHOWS MORE THAN WEEK OUT Chris Fairbanks Kevin Fraser Erica Rhodes If forgotten anything please message me or leave comment and take care of it , Potentially moving to KC deciding between neighborhoods promise done research Hey everyone am potentially changing jobs and think may have found good fit at KU Medical Center The job would start in May so need to move pretty quickly ’ve briefly visited Kansas City twice and really loved it but because was only there briefly couldn’ get great feel for the different neighborhoods ’ve read through previous threads about the neighborhoods and think have an alright feel about what area might be good fit for me but wanted to make post and hear people’ thoughts Thanks in advance for any help you can provide My budget is pretty flexible – am coming from city with high cost of living would prefer not to but if an area exists that meets the needs below would be open to paying up to month for bedroom Also don have preference for living in KS vs MO is there reason should This is new to me am just looking to rent My priorities are below and in order Thanks again Quiet and safe – am really sensitive to noise cars people outside and want to be away from major thoroughfares or from streets where people hang out on the weekends ’ thinking like bars concert venues Within or reasonably close mile to neighborhood with local coffee restaurants etc Basically somewhere could walk around on the weekend and see other people places Within minute drive of grocery stores will have car but ideally ’ be able to take public transit to work and get there in less than minutes If need to drive that is cool just like riding public transit Has outdoor parks for running The two neighborhoods that stuck out as possibly for me are Volker and Brookside Volker sounds really cool but read because it houses lot of younger people and it can get loud at night due to parties especially because it is close to Westport also read about random car break ins realize this can happen anywhere but just saw it mentioned more commonly for Volker Brookside seems nice but wasn’ sure what the area is actually like Also someone had recommended place near the Historic Country Club Plaza just because the apartment seemed nice but can’ get good read on what the area is like Thanks again for any help you can provide ]"
1,0,3545,0_shot_shots_film_of,"[shot, shots, film, of, this, from, drone, some, the, my]","[shot film, film shot, another shot, video shot, shots, shot took, shot, favorite shots, shoot, drone shot]","[film, video, photos, skyline, photography, sunset, favorite, drone shot, shoot, music video]","[FQ TUK TUK Shot on Nishika , Cinestill Shot On Nikon , Have you had that Shot ]"
2,1,2184,1_gunshots_anyone_fireworks_heard,"[gunshots, anyone, fireworks, heard, shooting, hear, was, on, what, shots]","[heard gunshots, hear gunshots, gunshots fireworks, gunshots, heard shots, shootings, gunshot, shots fired, anyone else hear, gunfire]","[gunshots, fireworks, shooting, shots, loud, news, anyone else hear, gunfire, last night, gun shots]","[ by District Station Anyone else hear the gunshots at Anyone know what happened , Gunshots in Lindbergh Did anyone else hear what sounded like gunshots in Lindbergh about minutes ago We saw APD head in the direction of the shots but don’ know what’ going on with that Alternatively does anyone know if there’ police bulletin or something we can check , Anyone else hear the explosion live in Belleville and we keep hearing explosions few minutes ago there was really loud one and several sirens shortly after Anyone know what going on Not gunshots or fireworks Sounded like lot of pressure]"
3,2,2105,2_shooting_shot_dead_mass,"[shooting, shot, dead, mass, injured, park, man, killed, wounded, at]","[shooting, shooting shooting, shooting near, shootings, another shooting, gunshots, shooting outside, people shot, shot near, shooter]","[shooting, highland park, mass shooting, shooting near, shots fired, people shot, shot killed, milwaukee, park shooting, shootings]","[Maine Shooting , Pixel Shooting , Shooting at and Carmen ]"
4,3,1509,3_assault_sexual_rape_sex,"[assault, sexual, rape, sex, man, raped, woman, charged, accused, arrested]","[sexual assault, accused sexual assault, sexually assault, sex assault, assaulted, accused sexual, charged assault, assault charges, assaults, assault]","[assault, sexual assault, raped, rapist, arrested, sex assault, attempted rape, nyc, women, allegations]","[Weinstein charged with sexual assault in Los Angeles , Ypsilanti man convicted in sexual assault case , Sexual Assault ]"
...,...,...,...,...,...,...,...
89,88,118,88_scooter_moped_stolen_scooters,"[scooter, moped, stolen, scooters, it, vespa, my, riding, electric, if]","[scooter stolen, stolen, scooter, stolen today, got stolen, theft, scooters, get stolen, moped, police report]","[scooter, scooter stolen, moped, scooters, honda, sticker, parking, ride, get stolen, guys]","[Stolen Scooter , stolen scooter , Stolen scooter ]"
90,89,140,89_orleans_new_quarter_french,"[orleans, new, quarter, french, bourbon, shot, nopd, police, shooting, street]","[new orleans police, shot killed, people shot, killed shooting, orleans police, fatally shot, new orleans, shot dead, woman shot, shooting shot]","[new orleans, french quarter, new orleans police, orleans police, shooting new, bourbon street, police say, nopd says, new orleans east, shot killed]","[New Orleans police officer shot in French Quarter , Man fatally shot in French Quarter New Orleans police say , New Orleans Louisa shooting ]"
91,90,132,90_carjackings_chicago_carjacking_cwb,"[carjackings, chicago, carjacking, cwb, armed, police, spree, robberies, een, in]","[carjackings, chicago crime, carjacking, armed carjacking, chicago police, chicago police say, carjack, chicago, across chicago, robberies]","[carjackings, carjacking, chicago, chicago police, cwb chicago, chicago police say, bucktown, arrests, chicago crime, wicker park]","[Chicago’ carjacking spree visualized , Carjackings in Chicago more than double in , Carjackings in Chicago ]"
92,91,247,91_violent_crime_mayor_total,"[violent, crime, mayor, total, detroit, fbi, rate, cities, city, homicides]","[violent crime rate, detroit police, violent crime, city crime, crime rate, crime, crime spike, crime new, detroit, crime plan]","[city leaders, crime plan, violent crime rate, crime areas, fight crime, attorney general, michigan, police chief, high crime, criminals]","[Violent crime rate in Detroit fell in police chief announces , New FBI crime report Detroit violent crime leads the nation , Detroit Mayor Says Plan In Works To Address Recent Spike In Crime ]"


In [32]:
# Show topic
topic_model.get_topic(14)

[('old', np.float64(0.11167292706331299)),
 ('year', np.float64(0.09342981468242907)),
 ('shot', np.float64(0.046676381278542234)),
 ('girl', np.float64(0.03926861208911103)),
 ('killed', np.float64(0.03843531700219713)),
 ('boy', np.float64(0.03840555376709412)),
 ('shooting', np.float64(0.037673827977572336)),
 ('detroit', np.float64(0.026951394637800528)),
 ('police', np.float64(0.022233713204972146)),
 ('woman', np.float64(0.021639949705360054))]

In [33]:
freq = topic_model.get_topic_info()

top_20 = freq#[1:21]
top_20 = top_20.explode('Representative_Docs')
#top_20

In [34]:
top_20 = top_20.rename(columns={'Name':'tweet_topic'})
#top_20

In [35]:
top_20_topics = list(set(top_20['Topic'].values))
top_20_topics = list(filter(lambda x: x != -1, top_20_topics))
#top_20_topics

In [None]:
# here we are merging topics with main document

df_with_topics = pd.DataFrame({"Document":stored_sentences,
            "Topic":topic_model.topics_})
df_with_topics = df_with_topics.merge(freq,on='Topic', how='inner')
df_with_topics = df_with_topics.rename(columns={
    'Document': 'bertopic_preprocessed_text',
    'Topic': 'tweet_topic'
})

#df_with_topics.head()

In [37]:
df_tweet_with_topic = df.merge(df_with_topics,
                               left_on='cleaned_text',
                               right_on='bertopic_preprocessed_text',
                               how='left')

# Drop duplicates based on 'bertopic_preprocessed_text'
df_tweet_with_topic = df_tweet_with_topic.drop_duplicates(subset=['bertopic_preprocessed_text'])

df_tweet_with_topic.head()

Unnamed: 0,author,created_utc,id,selftext,subreddit,title,date,crime,false_match,text,cleaned_text,topic,bertopic_preprocessed_text,tweet_topic,Count,Name,Representation,KeyBERT,MMR,Representative_Docs
0,[deleted],1546314346,abemmi,[deleted],Atlanta,Gunshots?,2019-01-01 03:45:46,True,False,Gunshots?:,Gunshots,2,Gunshots,2,2105,2_shooting_shot_dead_mass,"[shooting, shot, dead, mass, injured, park, man, killed, wounded, at]","[shooting, shooting shooting, shooting near, shootings, another shooting, gunshots, shooting outside, people shot, shot near, shooter]","[shooting, highland park, mass shooting, shooting near, shots fired, people shot, shot killed, milwaukee, park shooting, shootings]","[Maine Shooting , Pixel Shooting , Shooting at and Carmen ]"
5,[deleted],1546314372,abemqc,[removed],Atlanta,Anybody hear gunshots?,2019-01-01 03:46:12,True,False,Anybody hear gunshots?:,Anybody hear gunshots,1,Anybody hear gunshots,1,2184,1_gunshots_anyone_fireworks_heard,"[gunshots, anyone, fireworks, heard, shooting, hear, was, on, what, shots]","[heard gunshots, hear gunshots, gunshots fireworks, gunshots, heard shots, shootings, gunshot, shots fired, anyone else hear, gunfire]","[gunshots, fireworks, shooting, shots, loud, news, anyone else hear, gunfire, last night, gun shots]","[ by District Station Anyone else hear the gunshots at Anyone know what happened , Gunshots in Lindbergh Did anyone else hear what sounded like gunshots in Lindbergh about minutes ago We saw APD head in the direction of the shots but don’ know what’ going on with that Alternatively does anyone know if there’ police bulletin or something we can check , Anyone else hear the explosion live in Belleville and we keep hearing explosions few minutes ago there was really loud one and several sirens shortly after Anyone know what going on Not gunshots or fireworks Sounded like lot of pressure]"
6,[deleted],1546380318,abmjmw,[deleted],Atlanta,"Shot taken on a hazy, in-town hike a couple days ago. Can anyone guess the spot?",2019-01-01 22:05:18,True,False,"Shot taken on a hazy, in-town hike a couple days ago. Can anyone guess the spot?:",Shot taken on hazy in town hike couple days ago Can anyone guess the spot,0,Shot taken on hazy in town hike couple days ago Can anyone guess the spot,0,3545,0_shot_shots_film_of,"[shot, shots, film, of, this, from, drone, some, the, my]","[shot film, film shot, another shot, video shot, shots, shot took, shot, favorite shots, shoot, drone shot]","[film, video, photos, skyline, photography, sunset, favorite, drone shot, shoot, music video]","[FQ TUK TUK Shot on Nishika , Cinestill Shot On Nikon , Have you had that Shot ]"
7,WildVelociraptor,1546439160,abtu2p,,Atlanta,Gwinnett County teen commits suicide after accidentally shooting and killing friend,2019-01-02 14:26:00,True,False,Gwinnett County teen commits suicide after accidentally shooting and killing friend:,Gwinnett County teen commits suicide after accidentally shooting and killing friend,59,Gwinnett County teen commits suicide after accidentally shooting and killing friend,59,486,59_killed_shot_man_father,"[killed, shot, man, father, gun, mother, after, teen, police, fatally]","[shot killed, shot death, killed man, fatally shot, shot dead, man shot killed, killed, deadly shooting, shooting man, man shot]","[shot killed, shooting, man shot, pulled gun, shot death, fatally shot, shot dead, man gun, huntington, suspect]","[My Husband Father Of Two Shot And Killed , My Husband Father Of Two Shot And Killed , Father of Sierra Rhodd shot and killed ]"
8,Downbad2018,1546482755,ac0vud,"I'm in the process of leaving Charlotte and I have a few different cities to decide from. One of them is Atlanta.\n\nThe question I have for you ATLiens: is there a material difference between Charlotte and Atlanta? Of course Atlanta has a population 3x the size of Charlotte with the traffic to match, major hollywood flicks are shot there and virtually every rapper that matters either lives there or is from there. These are BIG deals but on the day to day I'm sure none of this, besides the traffic, affects the average person in a very real way (if you beg to differ please speak up). \n\nCharlotte has a huge financial services scene, 7 fortune five hundred companies, tons of bars, progressive leadership and a bright outlook. However their bars close at 2AM and the culture is totally family oriented and a lot of it feels like a massive suburb. The massive suburb vibe is what I get from most of Atlanta as well though. \n\nThe average salary between the two cities is almost identical, the cost of living is pretty much the same, housing is pretty much the same, demographics are pretty much the same. \n\nSo besides a slightly more progressive and more party oriented culture, I fail to see a material difference between Atlanta and Charlotte. An example of a material difference to me would be between say Atlanta and Los Angeles. Sure the day to day grind is the same----but that city is a totally different world when compared to Atlanta in many respects. \n\nSo please....point out to me the real differences between the two that I may not be seeing as Atlanta is one of the top two cities that I have to decide between when moving.\n\n&#x200B;",Atlanta,Material difference between Charlotte and Atlanta,2019-01-03 02:32:35,True,False,"Material difference between Charlotte and Atlanta: I'm in the process of leaving Charlotte and I have a few different cities to decide from. One of them is Atlanta.\n\nThe question I have for you ATLiens: is there a material difference between Charlotte and Atlanta? Of course Atlanta has a population 3x the size of Charlotte with the traffic to match, major hollywood flicks are shot there and virtually every rapper that matters either lives there or is from there. These are BIG deals but on the day to day I'm sure none of this, besides the traffic, affects the average person in a very real way (if you beg to differ please speak up). \n\nCharlotte has a huge financial services scene, 7 fortune five hundred companies, tons of bars, progressive leadership and a bright outlook. However their bars close at 2AM and the culture is totally family oriented and a lot of it feels like a massive suburb. The massive suburb vibe is what I get from most of Atlanta as well though. \n\nThe average salary between the two cities is almost identical, the cost of living is pretty much the same, housing is pretty much the same, demographics are pretty much the same. \n\nSo besides a slightly more progressive and more party oriented culture, I fail to see a material difference between Atlanta and Charlotte. An example of a material difference to me would be between say Atlanta and Los Angeles. Sure the day to day grind is the same----but that city is a totally different world when compared to Atlanta in many respects. \n\nSo please....point out to me the real differences between the two that I may not be seeing as Atlanta is one of the top two cities that I have to decide between when moving.\n\n&#x200B;",Material difference between Charlotte and Atlanta in the process of leaving Charlotte and have few different cities to decide from One of them is Atlanta The question have for you ATLiens is there material difference between Charlotte and Atlanta Of course Atlanta has population the size of Charlotte with the traffic to match major hollywood flicks are shot there and virtually every rapper that matters either lives there or is from there These are BIG deals but on the day to day sure none of this besides the traffic affects the average person in very real way if you beg to differ please speak up Charlotte has huge financial services scene fortune five hundred companies tons of bars progressive leadership and bright outlook However their bars close at and the culture is totally family oriented and lot of it feels like massive suburb The massive suburb vibe is what get from most of Atlanta as well though The average salary between the two cities is almost identical the cost of living is pretty much the same housing is pretty much the same demographics are pretty much the same So besides slightly more progressive and more party oriented culture fail to see material difference between Atlanta and Charlotte An example of material difference to me would be between say Atlanta and Los Angeles Sure the day to day grind is the same but that city is totally different world when compared to Atlanta in many respects So please point out to me the real differences between the two that may not be seeing as Atlanta is one of the top two cities that have to decide between when moving,20,Material difference between Charlotte and Atlanta in the process of leaving Charlotte and have few different cities to decide from One of them is Atlanta The question have for you ATLiens is there material difference between Charlotte and Atlanta Of course Atlanta has population the size of Charlotte with the traffic to match major hollywood flicks are shot there and virtually every rapper that matters either lives there or is from there These are BIG deals but on the day to day sure none of this besides the traffic affects the average person in very real way if you beg to differ please speak up Charlotte has huge financial services scene fortune five hundred companies tons of bars progressive leadership and bright outlook However their bars close at and the culture is totally family oriented and lot of it feels like massive suburb The massive suburb vibe is what get from most of Atlanta as well though The average salary between the two cities is almost identical the cost of living is pretty much the same housing is pretty much the same demographics are pretty much the same So besides slightly more progressive and more party oriented culture fail to see material difference between Atlanta and Charlotte An example of material difference to me would be between say Atlanta and Los Angeles Sure the day to day grind is the same but that city is totally different world when compared to Atlanta in many respects So please point out to me the real differences between the two that may not be seeing as Atlanta is one of the top two cities that have to decide between when moving,20,896,20_area_is_to_looking,"[area, is, to, looking, crime, and, are, we, but, like]","[indianapolis, indy, commute, suburbs, neighborhood, neighborhoods, buying house, apartments, homes, home]","[crime, neighborhood, living, moving, indianapolis, apartments, safe, houston, houses, neighborhoods]","[Moving to Indy Plainfield area will be moving to Indy pretty soon from Cincinnati We plan on renting for year to get an idea of the area along with saving more for down payment on house would very much like to get bit of property in rural area so want to really maximize the on what rental end up in for year have been getting bit of feel for some of the areas around Plainfield where will be working and co worker mentioned the area around the Airport has fairly high crime rate But the person talked to hasn been in Indy very long years or so found place in some of the neighborhoods just north of the Airport Salem Creek that seems to be good deal maybe even an overly good deal so thought would ask what some opinions of the area North of the Airport are in terms of crime not terribly picky person and don really care about the specific demographics of an area crime is my core concern don care for break ins buster car windows or drug ridden areas Had enough of that years ago when was poor Thanks for any opinions or feedback , Moving to Indiana Hi everyone have recently secured position with company whose head office is in Carmel Indiana currently on the east coast so will need to move within the next few weeks Ideally would like to live in Carmel for short commute but have also heard good things about living downtown in Broad Ripple and in Castleton Allisonville However also heard some iffy things about Broad Ripple and Castleton Allsonville For some background am and this will be white collar job am also single and would like to be around other young professionals do not want to be around loud college kids something heard about Broad Ripple is that there big bar scene full of somewhat rambunctious youth And while heard some good things about Castleton Allisonville it seems kind of meh If were to live downtown probably want to be in the Massachusetts Ave area heard really good things about it and it seems like good area Close to lot of things and already found few places that are within my price range be happy to give specifics if anyone asks And as for Carmel it seems like really nice area especially the Arts Design District want to live somewhere right off the Monon near Rangeline Drive and Main Street or City Center Drive want to be close to bars and restaurants but want them to cater to more mature clientele also want to be able to go home and not have to worry about hearing someone blasting loud music in their car like my quiet kinda want hit that sweet spot between close enough yet far enough away from everything if you know what mean Carmel would provide me with the shortest commute possibly even walking into the office and would also be good for maintaining my low auto insurance premium which pay by the mile obviously like to live in low crime area as well My budget for one bedroom place tops out around don need lot of amenities like gym or pool or whatever Although they be nice to have if wanted to use them it would likely be fairly infrequent What more important to me is quiet respectful neighbors Sorry if rambled know that was lot and know there will be trade offs no matter where end up settling Does anyone have any recommendations for me based on the information given appreciate it , Is Indy really that unsafe Also apartment recommendations downtown Hi all currently in West Lafayette but looking to move to bedroom studio in Indianapolis this summer My work office is downtown so preferably live in that area since don have car and also don know how good public transit is My budget is around or less for rent been looking into places like Edge in Fountain Square Buckingham Balmoral and Circle City Apartments both on Meridian St These places are within my budget and have good reviews but little concerned about the crime rate in Indy It really important for me as single young woman to feel safe and comfortable with my living arrangements and neighborhood Nearly all the places been looking at are in above average crime areas according to various crime search sites Does anyone live at near the areas mentioned and could give me some insight on safety crime in the area Are there other good places should be looking into Thanks for any help EDIT thanks so much for the awesome and informative replies Feeling lot better about safety downtown in Fountain Square Sounds like just practicing common sense will keep me okay]"


In [None]:
# we are now saving main df with topics and extracted keywords
df_tweet_with_topic = df_tweet_with_topic.rename(columns = {
    'Name' : 'tweet_topic_name'
})
df_tweet_with_topic['tweet_topic'] = df_tweet_with_topic['tweet_topic'].fillna('No data')
df_tweet_with_topic['tweet_topic_name'] = df_tweet_with_topic['tweet_topic_name'].fillna('Other')
df_tweet_with_topic['tweet_topic_name'] = df_tweet_with_topic.apply(lambda x:
                                                                    x['tweet_topic_name']
                                                                    if x['tweet_topic'] != -1
                                                                    else 'Other', axis = 1)

#df_tweet_with_topic

# Save the  posts with topics texts
output_file_path = 'topic/posts_with_topics.csv'
df_tweet_with_topic.to_csv(output_file_path, index=False)

In [39]:
df_tweet_with_topic.groupby('tweet_topic_name').count()

Unnamed: 0_level_0,author,created_utc,id,selftext,subreddit,title,date,crime,false_match,text,cleaned_text,topic,bertopic_preprocessed_text,tweet_topic,Count,Representation,KeyBERT,MMR,Representative_Docs
tweet_topic_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
0_shot_shots_film_of,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457,3457
10_stabbed_death_man_fatally,741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,741,741
11_me_was_he_my,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196,1196
12_hate_asian_crime_crimes,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775,775
13_vaccine_covid_governor_health,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602,602
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
90_carjackings_chicago_carjacking_cwb,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126,126
91_violent_crime_mayor_total,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223,223
92_orleans_new_you_the,179,179,179,179,179,179,179,179,179,179,179,179,179,179,179,179,179,179,179
9_chicago_police_shot_weekend,868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,868,868


In [40]:
df_tweet_with_topic['tweet_topic_name'].value_counts()

tweet_topic_name
Other                                3668
0_shot_shots_film_of                 3457
1_gunshots_anyone_fireworks_heard    2168
2_shooting_shot_dead_mass            2032
3_assault_sexual_rape_sex            1430
                                     ... 
62_billion_murray_dorsey_trump        115
88_scooter_moped_stolen_scooters      112
85_lightfoot_lori_mayor_chicago       107
78_backs_bmurray_pres_int              44
53_theil_payoffs_obama_soros           41
Name: count, Length: 94, dtype: int64

In [41]:
summary_df = pd.DataFrame(df_tweet_with_topic['tweet_topic_name'].value_counts()).reset_index()
summary_df

Unnamed: 0,tweet_topic_name,count
0,Other,3668
1,0_shot_shots_film_of,3457
2,1_gunshots_anyone_fireworks_heard,2168
3,2_shooting_shot_dead_mass,2032
4,3_assault_sexual_rape_sex,1430
...,...,...
89,62_billion_murray_dorsey_trump,115
90,88_scooter_moped_stolen_scooters,112
91,85_lightfoot_lori_mayor_chicago,107
92,78_backs_bmurray_pres_int,44


In [42]:
freq = topic_model.get_topic_info()
freq = freq[['Topic', 'Name', 'Representation']]
summary_df = summary_df.rename(columns = {'tweet_topic_name': 'Name', 'count': 'Count'})
display(freq)
display(summary_df)

Unnamed: 0,Topic,Name,Representation
0,-1,-1_shooting_in_on_the,"[shooting, in, on, the, shot, at, for, to, crime, of]"
1,0,0_shot_shots_film_of,"[shot, shots, film, of, this, from, drone, some, the, my]"
2,1,1_gunshots_anyone_fireworks_heard,"[gunshots, anyone, fireworks, heard, shooting, hear, was, on, what, shots]"
3,2,2_shooting_shot_dead_mass,"[shooting, shot, dead, mass, injured, park, man, killed, wounded, at]"
4,3,3_assault_sexual_rape_sex,"[assault, sexual, rape, sex, man, raped, woman, charged, accused, arrested]"
...,...,...,...
89,88,88_scooter_moped_stolen_scooters,"[scooter, moped, stolen, scooters, it, vespa, my, riding, electric, if]"
90,89,89_orleans_new_quarter_french,"[orleans, new, quarter, french, bourbon, shot, nopd, police, shooting, street]"
91,90,90_carjackings_chicago_carjacking_cwb,"[carjackings, chicago, carjacking, cwb, armed, police, spree, robberies, een, in]"
92,91,91_violent_crime_mayor_total,"[violent, crime, mayor, total, detroit, fbi, rate, cities, city, homicides]"


Unnamed: 0,Name,Count
0,Other,3668
1,0_shot_shots_film_of,3457
2,1_gunshots_anyone_fireworks_heard,2168
3,2_shooting_shot_dead_mass,2032
4,3_assault_sexual_rape_sex,1430
...,...,...
89,62_billion_murray_dorsey_trump,115
90,88_scooter_moped_stolen_scooters,112
91,85_lightfoot_lori_mayor_chicago,107
92,78_backs_bmurray_pres_int,44


In [None]:
# we are now creating summary table for further use

summary_df = summary_df.merge(freq, on='Name', how='left').fillna('Other')
summary_df = summary_df[['Topic', 'Name', 'Representation', 'Count']]

In [44]:
summary_df['Representation'] = summary_df['Representation'].apply(lambda x: 'Other' if x == 'Other' else ', '.join(x))
summary_df['Topic'] = summary_df['Topic'].apply(lambda x: -1 if x == 'Other' else int(x))

In [45]:
summary_df.head(50)

# Save the  summary resulsts
output_file_path = 'topic/summary.csv'
summary_df.to_csv(output_file_path, index=False)

In [None]:
# this stage is useful if you are planning to make qualitative analysis
# the main idea is to extract top 20 representative text per topic
# it extracted based on their probability score

freq = topic_model.get_topic_info()

top_20 = freq#[1:21]
top_20 = top_20.explode('Representative_Docs')
top_20

Unnamed: 0,Topic,Count,Name,Representation,KeyBERT,MMR,Representative_Docs
0,-1,3927,-1_shooting_in_on_the,"[shooting, in, on, the, shot, at, for, to, crime, of]","[community, chicago, neighborhood, home, nyc, public, back, houston, street, memphis]","[crime, like, shooting, murder, chicago, stolen, school, nyc, years, home]",Does LA ever invoke anxiety or some negative thoughts in you feel crazy posting this but it’ something that ’ve thought about for years First let me say ’ve lived in LA on and off over past several years Sometimes ’ve been more mentally and emotionally comfortable and other times feel like ’ on some sort of brink like where have no security and feel very threatened with nowhere and nobody to turn for security welcome to being an adult start to have these relentless negative thoughts Sometimes like if in traffic “what if several people pulled me out of my car and beat the shit out of me ” Or what if someone broke in my house and takes my dog Both of these would be strange and rare and even difficult to accomplish but they cross my mind in LA actually really enjoy LA for many reasons but there’ something present when ’ there that induces anxiety within me at times often feel it after encounter stereotypical homeless or unhoused person on the street feel this wave of emotion and wondering what happened to them in their life and then my thoughts quickly spiral out of control and begin imagining if lost my job and couldn’ pay bills imagine myself sitting on the street in filth and trash with my dog next to me Sometimes these thoughts make me want to cry This like the earlier examples is unlikely if lost my job ’ probably find new one and move on and make things work like tens of thousands of people do every year Still these thoughts are persistent and strong and are triggered by certain situations like being stuck on the or when witness something obscene or hear about something violent and gang related The Netflix doc “Hotel Cecil” seriously triggered these feelings know part of how feel is related to this sense of lawlessness and strange emotional helplessness feeling “no one is going to help you you’re fucked ” The worst ever had it was years ago near Santa Monica was going to get juice or something Stopped at light maybe somewhere on Pico and happened to look to my right where several EMTs or whatever type of first responders were standing over barely conscious woman She was completely naked sitting beside her wheelchair and looked in terrible shape This is at about pm That seriously seriously fucked me up and for years ’ve pondered how does human life arrive at that point what pain and suffering must have endured for so many years and will that be me someday Does anyone else have these thoughts or emotions and what do you think may drive them in our environment
0,-1,3927,-1_shooting_in_on_the,"[shooting, in, on, the, shot, at, for, to, crime, of]","[community, chicago, neighborhood, home, nyc, public, back, houston, street, memphis]","[crime, like, shooting, murder, chicago, stolen, school, nyc, years, home]",Free Cheap Notable Comedy Shows in Town This Week Houston has stellar comedy scene and list all the shows know about for you each week focus on standup shows but there are plenty great improvisational comedy shows to catch at Station Theater and Comedy Sportz MONDAY THE SECRET GROUP PM Gay Shame Parade Pay what you can This bi monthly comedy showcase is run by gays and features gays possible gays and people we wish were gay More info here RUDYARD PM Open Mic FREE One of the best comedy open mics in the country happens every Monday at Rudyard They got low ceilings excellent pub food and full bar making it feel like an old school comedy club minus drink minimums plus the lineup is curated by Houston comedy queen Dusti Rhodes More info here AXELRAD pm Comedy Night Upstairs at Axelrad FREE Brand new weekly comedy show hosted by Jeff Joe and Robert Turo More info here PJ SPORTS BAR PM Open Mic FREE Weekly open mic upstairs at PJ presented by Cutthroat Comedy TUESDAY THE HOUSTON IMPROV PM Open Mic Happy Hour FREE The open mic returns this month to Houston last remaining traditional comedy club More info here BOONDOCKS PM Open Mic FREE Boondocks is dive bar with second story that has stage in it It fantastic for comedy and it comedian favorite The bartenders are heavy handed and the laughs are free More info here THE SECRET GROUP PM Trash Flavored Trash FREE Weekly debaucherous late night show where hosts Andrew Youngblood and Zahid Dewji pour liquor into the mouths of first row attendees Also wells are only after More info here PM Pass The Mic Midnight Comedy Open Mic FREE hostless open mic with wells This is definitely bad idea More info here WEDNESDAY AMERICAN LEGION POST PM Greater Heights Comedy Night Slade Ham This is fundraiser to support veterans Ticket price includes two drinks and catered dinner from Spaghetti Western More info here THE HOUSTON IMPROV PM Summer Break Showcase Dusti Rhodes and others on this showcase of locals celebrating summer break More info here THE SECRET GROUP PM Bill Comedy Show Weekly comedy show featuring hilarious comics Tecates Espolon and free chips and salsa More info here LIBERTY STATION PM Comedy Showcase at Liberty Station FREE Every week we showcase Houston amazing comedic talent just for you for free All you have to do is show up and have good time More info here DARWIN PUB EATERY PM Comedy Open Mic FREE Sign up at and show at Hosted by Matt Rogers THURSDAY THE SECRET GROUP PM FREEDOMTOWN USA COMEDY SHOW FREE free comedy show with free shots free swag and free barbecue Also celebrating the very American tradition of tipping Info link pending AVANT GARDEN PM Upstairs GODDAMMIT FREE is one of Houston best comedy showcases Drinks are irresponsibly cheap it free unless you feel like donating and the taco truck that on the property is world class Every week the show features some of the best comics in Houston as well as frequent drop ins from out of towners and famous comedians Doors at show starts at usually realistically PM Upstairs Avant Garden Open Mic Sign up towards the end of Goddammit FRIDAY THE SECRET GROUP PM Matthew Broussard The hilarious smart and handsome Rice graduate returns to Houston for one night only More info here PM Box Filth This monthly showcase comes with unfiltered hilarious comedy Hosted by Doug Dalton and Nick Palermo More info here AM Box Midnight in The Box FREE comedy showcase hosted by Doug Dalton and featuring wells from to More info here HOUSTON CIDER CO PM Stand Up Throwdown FREE Weekly show at Houston Cider Company formerly Town City Brewing hosted by Steve Cantwell More info here SAINT BERNARD PUB PM Comedy Hour Tressa Eleby hosts this show featuring Kari Burt Mayam Robinson and birthday girl Shelly More info here SATURDAY THE SECRET GROUP PM You Now Have the Floor Comedic Town Hall Show Comics perform in character as concerned citizens bringing issues to town hall Produced by Conner Clifton More info here PM Box Andy Huggins All My Grandchildren Houston legendary Andy Huggins hosts and selects some of his favorite up and comers for this special showcase More info here PM Box The Speedball Open Mic FREE is fast paced minute open mic for newbies and seasoned vets alike More info here PJ SPORTS BAR PM Hotter Than Hell FREE Ugly Puppy yearly comedy showcase returns More info here SUNDAY THE SECRET GROUP PM Wonder Comedy Hour Weekly comedy show where gets you pint glass an Wonder beer and comedy show Hosted by Josh Stokes More info here PM Box Roast Battle Road to LA Prelim FREE Local comics square off in roast battle Top two competitors after weeks of competition will fly out to LA to compete on the official Roast Battle More info here PM The Secret Group Open Mic Free Weekly comedy open mic Sign up at More info here NOTABLE SHOWS MORE THAN WEEK OUT Chris Fairbanks Kevin Fraser Erica Rhodes If forgotten anything please message me or leave comment and take care of it
0,-1,3927,-1_shooting_in_on_the,"[shooting, in, on, the, shot, at, for, to, crime, of]","[community, chicago, neighborhood, home, nyc, public, back, houston, street, memphis]","[crime, like, shooting, murder, chicago, stolen, school, nyc, years, home]",Potentially moving to KC deciding between neighborhoods promise done research Hey everyone am potentially changing jobs and think may have found good fit at KU Medical Center The job would start in May so need to move pretty quickly ’ve briefly visited Kansas City twice and really loved it but because was only there briefly couldn’ get great feel for the different neighborhoods ’ve read through previous threads about the neighborhoods and think have an alright feel about what area might be good fit for me but wanted to make post and hear people’ thoughts Thanks in advance for any help you can provide My budget is pretty flexible – am coming from city with high cost of living would prefer not to but if an area exists that meets the needs below would be open to paying up to month for bedroom Also don have preference for living in KS vs MO is there reason should This is new to me am just looking to rent My priorities are below and in order Thanks again Quiet and safe – am really sensitive to noise cars people outside and want to be away from major thoroughfares or from streets where people hang out on the weekends ’ thinking like bars concert venues Within or reasonably close mile to neighborhood with local coffee restaurants etc Basically somewhere could walk around on the weekend and see other people places Within minute drive of grocery stores will have car but ideally ’ be able to take public transit to work and get there in less than minutes If need to drive that is cool just like riding public transit Has outdoor parks for running The two neighborhoods that stuck out as possibly for me are Volker and Brookside Volker sounds really cool but read because it houses lot of younger people and it can get loud at night due to parties especially because it is close to Westport also read about random car break ins realize this can happen anywhere but just saw it mentioned more commonly for Volker Brookside seems nice but wasn’ sure what the area is actually like Also someone had recommended place near the Historic Country Club Plaza just because the apartment seemed nice but can’ get good read on what the area is like Thanks again for any help you can provide
1,0,3545,0_shot_shots_film_of,"[shot, shots, film, of, this, from, drone, some, the, my]","[shot film, film shot, another shot, video shot, shots, shot took, shot, favorite shots, shoot, drone shot]","[film, video, photos, skyline, photography, sunset, favorite, drone shot, shoot, music video]",FQ TUK TUK Shot on Nishika
1,0,3545,0_shot_shots_film_of,"[shot, shots, film, of, this, from, drone, some, the, my]","[shot film, film shot, another shot, video shot, shots, shot took, shot, favorite shots, shoot, drone shot]","[film, video, photos, skyline, photography, sunset, favorite, drone shot, shoot, music video]",Cinestill Shot On Nikon
...,...,...,...,...,...,...,...
92,91,247,91_violent_crime_mayor_total,"[violent, crime, mayor, total, detroit, fbi, rate, cities, city, homicides]","[violent crime rate, detroit police, violent crime, city crime, crime rate, crime, crime spike, crime new, detroit, crime plan]","[city leaders, crime plan, violent crime rate, crime areas, fight crime, attorney general, michigan, police chief, high crime, criminals]",New FBI crime report Detroit violent crime leads the nation
92,91,247,91_violent_crime_mayor_total,"[violent, crime, mayor, total, detroit, fbi, rate, cities, city, homicides]","[violent crime rate, detroit police, violent crime, city crime, crime rate, crime, crime spike, crime new, detroit, crime plan]","[city leaders, crime plan, violent crime rate, crime areas, fight crime, attorney general, michigan, police chief, high crime, criminals]",Detroit Mayor Says Plan In Works To Address Recent Spike In Crime
93,92,179,92_orleans_new_you_the,"[orleans, new, you, the, to, that, it, and, we, of]","[new orleans, katrina, nola, louisiana, orleans, politicians, hurricanes, hurricane, memphis, mardi gras]","[orleans, new orleans, mardi gras, like, katrina, nola, louisiana, parades, hot dogs, tourism]",Why is my favorite bar and corner store not serving yakamein anymore Why is New Orleans and Louisiana in general so complacent The city that care forgot Laissez les bon temps rouler “Ain’ dere no more” “Dat dog is trash” Lots of phrases associated with our city But as previous post said it pretty tough to live here Crime poor services terrible roads high taxes no random sex from strangers high insurance high utilities New Orleans is going to fight you for it Our politicians don care about their constituency They don’ care about having couple of big fat hot dogs with the boys They all about getting into office and staying in office and we citizens are too complacent and dog shy to hold them accountable Obviously crime is HUGE problem Now we cannot control what our fellow citizens do But we can control what our politicians do When did New Orleans become so complacent that today scandal is nothing We don hold anyone accountable not even the chef when he chars the hot dog so bad it looks like stick from the woods The biggest backlash ever seen against politician is against the mayor and frankly that seems more out of spite than anything ’ honestly not sure what’ happening outside of hamburger hot dog time for the big wigs in city hall She he done nothing for the boys No hot dogs in sight She has done no more or no less than any mayor to my knowledge She travels first class Ok stop it Not cool But we attempting to recall her over that Not the fact that she has done nothing to increase our quality of life No new infrastructure projects or even plans She the head of the SWBNO She could be beating on the table at meetings and frying up perfectly charred hot dogs and calling people out by name for not doing their jobs She could be demanding more hot dogs for my big hot fellas She could be beating down the bushes to get turbines replaced to make for reliable water supply and using that water to boil the hot dogs But she not She’ not even emailing me back when send her wall of text about hot dogs with the boys And we don care We don even care about the parking scandal Down the drain like water soaked hot dog bun in an eating contest It gone completely by the wayside and no one was held accountable for it And the higher up politicians today would rather spend YOUR money on themselves or just roll over to their biggest donors who probably only eat “sausages” Jim Donelon has done NOTHING to attempt to bring new insurance business to our state in order to help reduce homeowners California dogs suck so bad Other than perhaps California and probably now Florida we pay the highest rates in the nation And don get me started on car insurance We more than we paid when was in GA wonder if it because his biggest donors are financial and insurance companies wonder if it’ the serious lack of options hot dogs with the boys in this city Steve Scalise is running on platform of being against crime He does realize he been in office this whole time What the hell has he done about it TODAY He has been resting on his stupid laurels and ignoring the time he could spend perfecting the dog Has he even thought about having hot dogs with the fellas Why is it only an issue when an election is coming up Our public service commission is soley in Entergy and Atmos pocket They approve rate increases hurricane recovery fees despite companies having insurance and not actually upgrading their infrastructure Entergy doesn like net metering from solar Ok Mr Entergy yes sir we eat the Hebrew nationals and get rid of that right away Entergy profits increase almost year over year almost as much as our bills They don’ even turn the lower burner back on after storms so can char my dogs with the boys Literal serfdom shit Somebody is making more money but it sure isn me Jeff Landry desperately wants to be governor What has he done to make the largest city in the state safer When has he even mentioned New Orleans crime in non get me elected kind of way All he done as AG is waste taxpayer money filing baseless lawsuits over vaccine mandates no dogs allowed after dark laws abortion and whatever nonsense he can come up with Has he even tried eating hot dogs at minor league baseball game in front of all his closest male friends Doesn quite fall within the mission statement of The Office of the Louisiana Attorney General strives to protect the people and resources of the State of Louisiana by providing variety of services including superior legal representation to the state professional and effective law enforcement and public education programs We just accept so much less than we could We get nothing for our tax dollars here and we pay so much No free hot dogs either Don get me wrong the people called out aren soley the problem Everyone is the problem Now don get me wrong realist Nobody gets into that line of work not thinking at least partially about themselves But Huey Long at least helped the people while he was stealing as much as he could gets his hands on He even gave free hot dogs out to the citizenry ok with that But remember the little guy when you tossing out the crumbs It seems like the big donors get the cake the hot dog the plate the fork and the crumbs Fact is we don care We just going to bitch and moan and take it up the We going to drive to work trying to avoid the giant potholes on our streetlight less dark streets to work job so we can pay our year property insurance our hot dog expenses including drinks our year flood insurance our Entergy bills our SWBNO bills and our year property taxes on our house that we know is going to flood and have no safe water and no power for weeks the next time hurricane rolls through Fricken pissed about having no power when want hot dogs now Like right now Seriously Reddit Rinse and repeat It disheartening to see so much wrong We could be great city but no one cares enough or is willing to step out of the way for minute to get things done
93,92,179,92_orleans_new_you_the,"[orleans, new, you, the, to, that, it, and, we, of]","[new orleans, katrina, nola, louisiana, orleans, politicians, hurricanes, hurricane, memphis, mardi gras]","[orleans, new orleans, mardi gras, like, katrina, nola, louisiana, parades, hot dogs, tourism]",CARNIVAL FROM THE TRASH CAN WHATS UP NOLA IM TWEAKER DAVE HOP FREIGHT TRAINS AND BLAST THROUGH THE CITY AT LEAST TWICE YEAR LOTTA THE TRAVELER KIDS YOU SEE IN THE QUARTER ARE ALRIGHT GOOD PEOPLE AND SOME OF THEM CLEAN UP AND QUIT THE ROAD AND GET HOUSES AND GO TO ST ROCK TAVERN IN THEIR PATCHED UP VESTS AND DRINK PBR AND BECOME CONTRIBUTING MEMBERS OF SOCIETY AM NOT ONE OF THOSE PEOPLE IM THE PARANOID ASSHOLE THEY WARN YOU ABOUT THERE ARE LOTTA DIRTY KIDS TRYIN TO SEE THE CARNIVAL AND HALF OF THEM DONT KNOW WHAT THE FUCK SO WROTE GUIDE TO HELP EM THAT NEVER BEEN THERE NONE OF THEM ARE AS BAD AS ME SO DONT GO OVER TO VAGABOND JUDGING EVERYBODY THEY JUST TRYIN TO GET ALONG LIKE EVERYBODY ELSE WROTE THIS FOR THEM AND IM SHARING IT WITH YOU IN BUNCHA DOUGHNUT EATING FRENCH ASSHOLES ROLLED UP TO SWAMP WHERE THE NATIVES WERE CHILLIN FAT DICKED EVERYBODY OFF SO THEY COULD SET UP CAMP BY THE RIVER HUNDRED YEARS LATER ANDREW TWENTY DOLLAR BILL IN STRIPPERS ASS JACKSON LED BUNCH OF PIRATES AND HOMIES AND LOCAL TOWN DRUNKS AND GUESS FEW PANSY ASS UNIFORM WEARING TROOPS IN DESPERATE DEFENSE AGAINST THE THOSE DICKBAG TEA DRINKING BRITS WHO FORGOT ABOUT WHAT HAPPENED LAST TIME THEY TRIED TO BEAT AMERICA IN THE REVOLUTION YEARS EARLIER NEW ORLEANS SAVED THE US FROM LOSING THE WAR OF AND LIKE YEARS LATER THE FIRST OFFICIAL MARDI GRAS KICKED OFF MARDI GRAS IS DOUGHNUT EATER FOR FAT TUESDAY ALRIGHT THE CATHOLICS TAKE HEADY MINUTE EVERY YEAR ND DONT EAT OR FUCK FOR DAYS JUST LIKE JESUS OR MAYBE THEY JUST GIVE UP CHOCOLATE OR PORK OR SOME SHIT IDUNNO ALL THAT MATTERS IS THAT SHIT IS CALLED LENT AND IT STARTS ON WEDNESDAY SO EVERY ONE WANTS TO LOAD UP HELLA ON TUESDAY BEFORE LENT AND HAS BIG OLD BASH AND DRINKS OVERPRICED BULLSHIT HANDGRENADES AND GETS FAT THE TUESDAY BEFOR FAT TUESDAY MARDI GRAS BUT THAT AINT THE WHOLE STORY CATHOLICS LOVE THEIR RITUALS MORE THAN TWEAKED OUT OCD FOG MAMA MEAN SHIT THEY CELEBRATE DICKSMAS FOR DAYS STARTING ON THE OF DECEMBER AND THEN THE DAY AFTER THAT SHITS DONE ON JANUARY THEY START GETTING READY FOR MARDI GRAS OR AT LEAST IN NOLA THEY DO IF YE HEAR SOMEONE TALK ABOUT CARNIVAL ITS THE SAME SHIT JUST WHOLE WEEKS OF RAGIN AND PARADES AND DAY DRINKING AND HELLA TOURISTS ND GUESS SOME PEOPLE WITH JOBS STILL WORK BUT HOP FREIGHT TRAINS AND SOMETIMES LIVE OFFA NOTHIN BUT EDIBLE WILD PLANTS AND CRYSTAL SO DUNNO ABOUT THAT LYFE LIKE SAID JOAN OF ARC THAT BABELY FRENCH DOUGHNUT EATING TEENAGER WHO BLEW GOLDEN STREAM OF HOT JESUS LAVA OUT OF HER ASS OR SOMETHING AND DEFEATED SOME BULLSHIT ARMY BACK IN THE DAY WAS FROM ORLEANS LIKE THE OG ORLEANS ACROSS THE ATLANTIC IN DOUGHNUTEATERSTAN SO BECAUSE AMERICANS DONT KNOW SHIT ABOUT GEOGRAPHY WE FUCKKIN ADOPED HER AS THE MAID OF OUR SWAMP TOWN AND PUT BIG ASS GOLDEN STATUE OF HER RIDING HORSE IN THE FRENCH QUARTER BECAUSE TOURISTS KEPT GETTING LOST AND ITS PRETTY HARD TO MISS THE JOAN OF ARC PARADE ON JANUWEEERY IS THE OFFICIAL START OF THE MARDI GRAS SEASON AND THE FIRST OF ABOUT PARADES THSAT LAST FROM THEN UNTIL MARCH BY THE WAY IF YER AN ALIEN FROM THE FUTURE READING THIS SHIT AND ITS NOT FIRST OF ALL PLEASE SEND HELP SECOND OF ALL THERE USED TO BE COOL PEOPLE BEFORE THE TRUMPAPOCALYPS SO DONT JUDGE US THIRD OF ALL THESE DATES MIGHT BE LITTLE OFF BECAUSE EASTER DOESN ALWAYS FALL ON THE SAME DAY AND THE START OF LENT IS DAYS BEFORE EASTER AND FAT TUESDAY IS THE DAY BEFORE THE START OF LENT GOT IT SO FUCK YEA OKAY YER SCHWILLY DIRTY KID WITH FUCKIN DOG AND SOME SHITTY TATTOOS THAT SOMEONE WITH DREADLOCKS GAVE YOU IN THE BACK OF MINIVAN WHILE YOU WERE HIGH ON ACID ROLLING THROUGH COLORADO COOOOOOL MAN YEAH REAL COOL NICE SKANK YER BANJOS LITTLE OUT OF TUNE BUT YOU GOT GOOD WEED SO WHATEVER GUESS YOU CAN HANG OUT YOU GOT STUCK UP NORTH SOMEWHERE BECAUSE YOU WERE STRUNG OUT OR WITH SOME GIRL AND YER FINALLY GETTING AROUND TO MAKING IT FAR ENOUGH SOUTH TO STOP FREEZING YER ASS OFF OR YER ONNA TRHE MILLION FUKKIN OOG ASS WEST COUCH CALIFORNIA WEED TRIMMING HIPPIE DIRTY KIDS WHO ONLKY KNOWS ITS WINTER BECAUSE YER BLONDE SUGAR MAMA KEEPS BRINGING YOU TO THE BEACH BECAUSE THE WAVES ARE GOOD ND YOU GOT SAND IN YER CARSHARTS AGAIN ND LIFE IS GREAT BUT YER STARTING TO GET BOARD OF RIDING BETWEEN LA AND SACRAMENTO OVER AND OVER SO YOU TAKE THE SUNSET OUT EAST AND GET TO TEXAS AND GET STUCK IN AUSTIN WITH THE HOMEBUMS AND EVENTUALLY MAKE IT TO NOLA YE MADE IT TO NOLA BEFORE CARNIVAL HOW DO YE GET READY WELL WHATS SKEEZE BALL WELCOME BACK IF ITS YER FIRST TIME HERE MAKE YER WAY DOWN DECATUR STREET OR DICK HEY TER STREET IN THE FRENCH QUARTER AND MEET SOME OF YER FELLOW HOMEBUMS ND DIRTY KID SPANGERS HOPE YE BROUGHT WEED BECAUSE WE FUKKIN BROKE ND THE SHIT YOU GET OUT IN THE WARD IS GODDAMN MEXICAN BRIK WEED HALF THE TIME ANYWAY SO WHAT IM TRYIN TO SAY IS WE GLAD TO SEE YOU HOMIE JOIN THE BUM PILE AND GET COMFTAABLE IF YER ONE OF THOSE PUSSIES WHO FUKKIN SLEEPS THEN FIND GOOD SLEEP SPOT GUESS FIGURE OUT WHERE THE BUSES GO SO YOU CAN GET OUT TO THE WARDS AND UPTOWN TO FLY SIGN ON THE NUETRAL GROUND AND GET NEW TARP AT LOWES MAKE SOME FRIENDS AND FIGURE OUT WHERE ALL THE GOOD TRASHCANS ARE HERES HINT THEYRE ALL GOOD THEY BUSTED ALL THE TWEAKERS OUTTA THE NAVAL YARD AT THE END OF THE WORLD AND THOSE WERE THE GUYS WHO STOLE AND FIXED UP AND SOLD BIKES TO DIRTY KIDS LOTTA THE TIME SO IT MIGHT BE LITTLE HARDER BUT IF YOU GET OR BUCKS TOGEWTHER AND JUST ASK PEHOPLE YOU CAN USUALLY FIND ONE WITH AIR IN THE TIRES GET GOOD ROUTINE AND KNOW THAT YER GONNA BE HOMEBUM FOR LITTLE WHILE ND PUT YER SHITTY TRAINHOPPER TAG ON THE BATHROOM STALL ND OWN IT START GETTING COSTUME TOGETHER OR JUST RUN AROUND WITH PLASTIC BEEDS DUCTTAPED AROUND YER CROTCH YOULL FIGURE IT OUT CARNIVAL IS COMING SO THE PARADES GOT HELLA FLOATS COSTUmES SHIT IF YER TRIPPIN BALLS iiiIT MIGHT FREAK YouOU OU LITTLE bit BIT BUT EVERYONE ELSE WILL PROBABLY BE DRUNK AS SHIT JUST DONT PET THE HORSES OKAY THEY GOT HELLA BEADS AND IF YOU SCOOP SOME UP OFF THE GROUND AINT GONNA TELL YOU NOT TO BUT THEY KINDA LOOSE THEIR MAGIC IF SOMEONE DONT CATCH EM THE FIRST TIME AND ITS THE MAGIC THAT MAKES THE TITS APPEAR SO IDK MAYBE FIND SOME AHEAD OF TIME OR MAKE SOME LUCKY CATCHES TEND TO RESPOND TO MY BASIC BODILY NEEDS WITH ILLEGAL SUBSTANCES BUT IM GONNA AGREE WITH TALL SAM JONES ON THIS ONE AND SAY YOU SHOUDL PPROBABLY DRINK SOME GODDAMN WATER ONCE IN WHILE THAT THING CALLED BOURBON SURFING WHRERE YOU CRUISE ND CHECK TRASHCANS FOR fREE BOOZE OR JUST GET THEM SITTING ON EVERY LEDGE AT ELBOW HEIGHT AFTER THEY BEEN LEFT PRETTY MUCH APPLIES TO HALF THE CITY SO GO NUTS TRY NOT TO GET YER STUFF STOLEN THERES LOTTA GODDAMN PEOPLE AROUND GET THE HELL OUTTA THE QUARTER ONCE IN WHILE TOO LOTTA THE BEST PARADES ARE UPTOWN OR OUT IN THE WARDS OR ON THE WESTBANK OR MID CITY USUALLY JUST SAY FUCK IT AND SPANGE UP TWO BUCKS SO CAN RIDE THE BUS UNTIL RANDOMLY SEE SOME COOL SHIT AND HOP OFF BUT HERES GODDAMN SCHEDULE IF YER NERD FUUUUUUUUUUUUUUUUUUCK MY PINKY FINGETR IS GETTING TIRED FROM FINGERTING THIS GRAPEFRUIT AND HOLDING DOWN THE SHIFT KEY AT THE SAME TIME SO WHATEVER BABABABABABBABABABABABABBABABA DICKS DEMONS KALI KAALI KALI SACRIFICE MY LOINS TO YER TERRIBLENESS TAKE ME BEEZELBUB WRRRRETCH


In [47]:
top_20 = top_20.rename(columns={'Name':'tweet_topic'})
#top_20

In [48]:
top_20_samples = []

for topic in top_20_topics:
    samples = df_tweet_with_topic.loc[df_tweet_with_topic['tweet_topic'] == topic ]
    samples = samples[['tweet_topic', 'text']]
    samples = samples.drop_duplicates(subset=['text'])
    samples = samples.rename(columns = {
        'tweet_topic': 'Topic',
        'text': 'Representative_Docs'
    })
#     samples = samples.sample(n=min(samples.shape[0], 20), random_state=1)
    samples = samples.sample(n=20, random_state=1)

    top_20_samples.append(samples)

In [49]:
top_20_samples_df = pd.concat(top_20_samples).reset_index().drop(columns='index')
top_20_samples_df

Unnamed: 0,Topic,Representative_Docs
0,0,"I was going over my archives, and I had shots to make a panorama of the Mid-Wilshire area from Aug 2017. Enjoy!:"
1,0,"Urgently seeking a photographer: I had a photographer lined up for tomorrow 9/24 and Saturday 9/25 that had to cancel due to COVID. So I’m reaching out to see if any one here can help! \n\n\nThe photoshoot tomorrow was supposed to be candid shots of myself, fiancee, and the officiator from 11-1 in Freedom Park. The second shoot was for the ceremony/reception from 6-8.\n\nAnyways, if you know someone that can help, send a pm!"
2,0,My Shot of Wilshire Grand and Central Library:
3,0,Dashcam shot of someone crossing Alameda with a Chucky doll dressed as their baby:
4,0,LACLEDE’S LANDING: Hope he has his tetanus shot:
...,...,...
1855,92,"A city affair to continue or end?: I've been cheating on my home of Chicago with New Orleans for years. 2-4 times a year I've been visiting. I'll never Air Bnb cause I see what it does to the neighborhoods . Was so weird to see tourists flooding the streets of Bywater one of my last visits. I can retire in a few years and think about making things more permanent but I read your threads and see the news and my passion is struggling with caution. Violence I'm sadly used to but I would like to not be. basic amenities and public service seem to be a huge challenge. I love you people of New Orleans and it's the only place I ever feel at home, but is it worth it? No disrespect intended. I very much respect the people and the spirit of New Orleans. Also I'm not sure how safe my daca wife is in such a red state. Be safe and well in 2023"
1856,92,"Downtown's deterioration feels like it's getting worse, faster. Anyone else considering getting out?: The homeless encroachments, violence, shuttered businesses, downtown LA feels like it's getting worse and worse daily and it's got me questioning if I should move out of the area. The value here is pretty great during non-pandemic times, compared to the other major centers of food/bars/entertainment, but this all has me questioning if I should bite the bullet and move to Culver or Glendale at this point.\n\nAnyone else?"
1857,92,"Today in therapy, I talked about my experience during Hurricane Katrina as a ""Katrina Baby."" I talked about everything for the first time in 17 years and it came out as word vomit. It never occurred me that what we went through would be considered ""trauma."": Really, I just spilled my guts this morning. Just one long trauma dump. Didn't realize how much I had suppressed. \n\nThis came months after my therapist first brought up Hurricane Katrina with me (we got on the subject somehow). The crazy part about it? She asked how I dealt with the trauma. My response was: ""Trauma? I didn't go through any trauma."" I mean, I was lucky--we evacuated. My house wasn't damaged. I was fine. \n\nBut then, I realized--the anxiety that weekend before the storm. I never saw my mom being so anxious about a hurricane. Spending two months in Baton Rouge with my sister with 8 other people in a 1 bedroom apartment. My mom getting sick and having to take care of her. Hearing so many stories from refugees that survived the floods and said they couldn't find loved ones (people breaking down crying in stores). My oldest sister losing her house and then her and her kids had to move to Alabama (after years of them visiting our home at least once a week--I grew up with my nieces and nephews). Losing friends that had to stay elsewhere and going through school alone. Just seeing the city underwater and hearing and seeing people trapped in attics or on the roof or dying in the Superdome. So many horror stories for the next...years. Just coming home and seeing the city damaged and different. Seeing those damn signs showing ""1 dead in attic."" The National Guard in the city, waiting in lines outside grocery and lines being guarded by armed soldiers. That one time driving with my mom and seeing men in hazmat suits carrying out what I believe was a body. I guess the trauma was that everything changed so much and so fast and having to adjust to this ""new normal."" But I never thought of it as trauma. \n\nThe worst part: The response from the country. Hearing people say, ""The people that didn't leave deserved to die,"" or ""If they couldn't afford to leave, that's their fault for being poor."" That's how the ""greatest country in the world"" talks to their fellow citizens? Or friends or my husband (that were not on welfare, or committed crime) being refugees and bullied by people in Texas for being from New Orleans (so they must be thugs or lazy, etc.). Wow. \n\nI just watched ""Katrina Babies"" last night on HBOMax. He was right--no one thinks about how that affected children. And that could explain some things going on in this city now. Unresolved trauma passed on to the next generation. \n\nHope everyone else is coping with today."
1858,92,"Conference room to rent? People I want to meet: So I run a web site design company in New Orleans and hurricane Ida screwed up the economy so bad there that if you're not a roofer or a electrician then you're kinda out of work. Don't get me started about how we were already doing bad economy wise because of covid.\n\nAnyway.\n\nI grew up here and have decided to start getting more clients up here because I'm tired of evacuating every year. Or just spend hurricane season in Memphis by default.\n\nI can usually do my meetings over lunch or coffee but some meetings require a little more of a quiet atmosphere because of zoom, etc.\n\nIdeally it'd be midtown but I'm not really picky as far location.\n\nAlso I'm looking to make friends with soe people:\n\n1. I'm looking to meet IT people and photographers/videographers. When I'm out and about doing my thing, people often see me with the 2 chromebooks open and assume I can help them IT services. I'm just not that kind of doctor but I absolutely carry cards for people that do.\n2. I am looking for local photographers/videographers. Difference between a good web site and a great web site is custom photos and I typically do restaurant work so lots of food shots.\n3. Any marketing companies I can talk to? I'm a big believer of using local people for social media management just because of nuances, if that makes sense.\n4. Anyone in merchant services/credit card processing and CPA's are also people I need to meet.\n\nThanks in advance."


In [50]:
top_20 = top_20.drop(columns=['Representative_Docs']).drop_duplicates(subset=['Topic'])
top_20_updated = top_20.merge(top_20_samples_df, on='Topic')
#top_20_updated

In [51]:

# Save the representative texts
output_file_path = 'topic/representative_texts.csv'
top_20_updated.to_csv(output_file_path, index=False)

# Topic Timeline

In [52]:
# if you do not use probabilities, topics some withing tuple, here I get it as list
# freq.Count.sum() # test the distribution
#Topics = topics[0]
#print(len(Topics))

print(len(topics))
print(len(stored_timestamp))
print(len(stored_sentences))

51429
51429
51429


In [53]:
topics_over_time = topic_model.topics_over_time(docs=stored_sentences,
                                                #topics=topics,
                                                timestamps=stored_timestamp,
                                                global_tuning=True,
                                                evolution_tuning=True,
                                                nr_bins=20)


# graph
fig = topic_model.visualize_topics_over_time(topics_over_time, top_n_topics=10, width=1000)
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.02
),
legend_font_size=10)

fig.show()




20it [00:08,  2.27it/s]


In [54]:
# fig.write_html("run_visual/"+ran_dict[filename]+"MaliciousBot-Misinfomation_topics over time (All 8 - good).html")
fig.write_html("figures/topics_over_time.html")

# fig.show()

In [55]:
# visualize certain topics

fig = topic_model.visualize_topics_over_time(topics_over_time, topics=[ 2, 3, 4,5, 6, 7,8, 9], width=1000)
fig.update_layout(legend=dict(
    yanchor="top",
    y=0.99,
    xanchor="left",
    x=0.02
),
legend_font_size=10)

fig.show()


In [56]:
# fig.write_html("run_visual/"+ran_dict[filename]+"MaliciousBot-Misinfomation_topics over time (All 8 - good).html")
fig.write_html("figures/topics_over_time_selected_topics.html")

# fig.show()

### Cohorence score

In [None]:
# Sometimes journals ask for cohorence score for Bertopic, it is a rare instances but it could happen
# gensim package is outdated now, not  compatable with transformers
# I modified these codes , depending on the python environment, I need to upgrade or downgrade numpy 
# it is a working code block but I suggest you to create new environment to run this codes
#  because you need to downgrade several packages that come with BERTopic
# to retain your BERTOpic environment, it would be better to  create a new environment and
#  install gensim package and run the codes with -stored_sentences-  that come above

import gensim.corpora as corpora
from gensim.models.coherencemodel import CoherenceModel


# Ensure stored_sentences and topics have the same length
if len(stored_sentences) != len(topics):
    raise ValueError("stored_sentences and topics must have the same length")

# Create DataFrame
documents = pd.DataFrame({
    "Document": stored_sentences,
    "ID": range(len(stored_sentences)),
    "Topic": topics
})

# Group by topic and join documents
documents_per_topic = documents.groupby(['Topic'], as_index=False).agg({'Document': ' '.join})

# Preprocess cleaned documents
cleaned_docs = topic_model._preprocess_text(documents_per_topic.Document.values)

# Extract features for Topic Coherence evaluation
vectorizer = topic_model.vectorizer_model
analyzer = vectorizer.build_analyzer()
words = vectorizer.get_feature_names_out()
tokens = [analyzer(doc) for doc in cleaned_docs]
dictionary = corpora.Dictionary(tokens)
corpus = [dictionary.doc2bow(token) for token in tokens]
topic_words = [words for topic in range(len(set(topics))-1)]

# Flatten topic_words and convert to IDs
topic_words_ids = []
for topic in topic_words:
    topic_ids = [dictionary.token2id[word] for word in topic if word in dictionary.token2id]
    topic_words_ids.append(topic_ids)

# Evaluate Coherence
coherence_model = CoherenceModel(topics=topic_words_ids,
                                 texts=tokens,
                                 corpus=corpus,
                                 dictionary=dictionary,
                                 coherence='c_v')
coherence = coherence_model.get_coherence()
print('Coherence value of the model:', coherence)


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject