In [2]:
from wordcloud import WordCloud,STOPWORDS
import matplotlib.pyplot as plt
import scipy
import numpy as np
from numpy.linalg import norm
from math import sqrt
import pandas as pd
import json
import re
import os

In [3]:
punc = re.compile(r"[\n,!\?\’'\+:\"\.\$&@#/\(\)\[\]\|\{\}]")
nonwords = re.compile('^[^a-zA-Z0-9]+$')
links = re.compile(r"https")

In [4]:
def clean_text(df, col_to_clean: str): 
    df[col_to_clean] = df[col_to_clean].str.lower().replace('\n','',regex=True).str.replace('’',"'",regex=True).str.replace(punc,'',regex=True).str.strip()
    df = df[~df[col_to_clean].str.contains(links)]
    df = df[~df[col_to_clean].str.contains(nonwords)]
    df = df[df[col_to_clean]!='']
    return df

def tokinize(df, col_to_tokenize: str):
    df['tokenized'] = df[col_to_tokenize].str.split(' ')
    return df

def get_freqs(df, col_tok='tokenized'):
    df = df.explode(col_tok)
    freqs = df[col_tok].value_counts().to_dict()
    STOPWORDS.update({punc.sub('',x) for x in STOPWORDS})
    new_freqs = {word:freq for (word,freq) in freqs.items() if word not in STOPWORDS}
    return new_freqs

In [5]:
def get_post_df(json_filename):
    df = pd.read_json(json_filename, lines=True)
    # changing the type col
    df = df.rename({"_type": "type"}, axis="columns")
    df = df.replace(to_replace="snscrape.modules.reddit.Submission", value="post")
    df = df.replace(to_replace="snscrape.modules.reddit.Comment", value="comment")
    # change to posts only
    df_posts = df[df["type"] == "post"]
    df_posts = df_posts.dropna(subset=["selftext"])
    df_posts = df_posts.drop(["body"], axis=1)
    df_posts = df_posts.drop(["type", "parentId", "subreddit", "link"], axis=1)
    df_posts = df_posts.reset_index()

    return df, df_posts

def apply_text_len(df, col_text='selftext', minimum_length=500, visualise_lengths=False):
    df['text_len'] = df[col_text].apply(len)
    if(visualise_lengths):
        plt2 = df['text_len'].plot(kind='kde',
            title="Distribution of Comment Length",
            xlabel='Comment Length',
            xlim=0)
    df = df[df["text_len"] >= minimum_length]

    df = clean_text(df, col_text)
    return df

def draw_wordcloud(freqs):
    wordcloud = WordCloud()
    wordcloud.generate_from_frequencies(frequencies=freqs)
    plt.figure()
    plt.imshow(wordcloud, interpolation="bilinear")
    plt.axis("off")
    plt.show()

# Initialising the Posts

In [6]:
_, doomer_posts = get_post_df("Data/r_doomer_scraped.json")
_, lonely1_posts = get_post_df("Data/r_lonely_scraped.json")
_, lonely2_posts = get_post_df("Data/r_loneliness_scraped.json")
_, depr_posts = get_post_df("Data/r_depression_scraped.json")

In [None]:
doomer_posts = apply_text_len(doomer_posts)
lonely1_posts = apply_text_len(lonely1_posts)
lonely2_posts = apply_text_len(lonely2_posts)
depr_posts = apply_text_len(depr_posts)

# Getting Metadata

In [24]:
print(len(doomer_posts))
print(len(lonely1_posts))
print(len(lonely2_posts))
print(len(depr_posts))

2921
4860
2754
8811


In [16]:
print(f"Average length of doomer: {doomer_posts['text_len'].mean()}")
print(f"Average length of lonely1: {lonely1_posts['text_len'].mean()}")
print(f"Average length of lonely2: {lonely2_posts['text_len'].mean()}")
print(f"Average length of depr: {depr_posts['text_len'].mean()}")

Average length of doomer: 1275.1535145888595
Average length of lonely1: 1228.0242533573862
Average length of lonely2: 1489.4160427807487
Average length of depr: 1336.8282794624015


In [20]:
print("Doomer")
print(f"Earliest post: {doomer_posts['created'].min()}")
print(f"Oldest post: {doomer_posts['created'].max()}")

print("Lonely1")
print(f"Earliest post: {lonely1_posts['created'].min()}")
print(f"Oldest post: {lonely1_posts['created'].max()}")

print("Lonely2")
print(f"Earliest post: {lonely2_posts['created'].min()}")
print(f"Oldest post: {lonely2_posts['created'].max()}")

print("Depr")
print(f"Earliest post: {depr_posts['created'].min()}")
print(f"Oldest post: {depr_posts['created'].max()}")

Doomer
Earliest post: 2019-04-22T10:51:08+00:00
Oldest post: 2022-11-15T16:58:27+00:00
Lonely1
Earliest post: 2022-09-13T03:50:53+00:00
Oldest post: 2022-11-17T00:56:25+00:00
Lonely2
Earliest post: 2011-11-09T07:06:30+00:00
Oldest post: 2022-11-15T03:53:57+00:00
Depr
Earliest post: 2022-02-28T23:44:13+00:00
Oldest post: 2022-11-15T16:33:02+00:00


In [7]:
doomer_users = doomer_posts['author'].unique()
lonely1_users = lonely1_posts['author'].unique()
lonely2_users = lonely2_posts['author'].unique()
depr_users = depr_posts['author'].unique()

In [8]:
print(len(doomer_users))
print(len(lonely1_users))
print(len(lonely2_users))
print(len(depr_users))

1811
3851
2290
8332


In [9]:
doomer_lonely1_users = np.intersect1d(doomer_users, lonely1_users) 
doomer_lonely2_users = np.intersect1d(doomer_users, lonely2_users) 
doomer_depr_users = np.intersect1d(doomer_users, depr_users)

lonely1_lonely2_users = np.intersect1d(lonely1_users, lonely2_users) 
lonely1_depr_users = np.intersect1d(lonely1_users, depr_users) 
lonely2_depr_users = np.intersect1d(lonely2_users, depr_users) 

common_users1 = np.intersect1d(doomer_lonely1_users, doomer_depr_users)
common_users2 = np.intersect1d(doomer_lonely2_users, doomer_depr_users)
common_users = np.intersect1d(common_users1, common_users2)

In [30]:
print(len(doomer_lonely1_users))
print(len(doomer_lonely2_users))
print(len(doomer_depr_users))

print(len(lonely1_lonely2_users))
print(len(lonely1_depr_users))
print(len(lonely2_depr_users))

print(len(common_users1))
print(len(common_users2))
print(len(common_users))

13
6
14
31
161
25
2
1
0


# Drawing a Wordlcoud

In [10]:
doomer_posts = tokinize(doomer_posts, 'selftext')
lonely1_posts = tokinize(lonely1_posts, 'selftext')
lonely2_posts = tokinize(lonely2_posts, 'selftext')
depr_posts = tokinize(depr_posts, 'selftext')

In [11]:
doomer_freqs = get_freqs(doomer_posts)
lonely1_freqs = get_freqs(lonely1_posts)
lonely2_freqs = get_freqs(lonely2_posts)
depr_freqs = get_freqs(depr_posts)

In [None]:
draw_wordcloud(doomer_freqs)
draw_wordcloud(lonely1_freqs)
draw_wordcloud(lonely2_freqs)
draw_wordcloud(depr_freqs)

# Getting a Sample to be Read

In [38]:
# num_samples = 50
# doomer_posts.sample(n = num_samples)['selftext'].to_csv('Data/sample50_doomer.txt', sep=' ')
# lonely1_posts.sample(n = num_samples)['selftext'].to_csv('Data/sample50_lonely.txt', sep=' ')
# lonely2_posts.sample(n = num_samples)['selftext'].to_csv('Data/sample50_loneliness.txt', sep=' ')
# depr_posts.sample(n = num_samples)['selftext'].to_csv('Data/sample50_depr.txt', sep=' ')

# Analaysing age & gender of the users

In [None]:
    # age_certain = re.findall(r'\d{2}[mf]', post)
    # age_certain.extend(re.findall(r'\d{2} year', post))
    # age_certain.extend(re.findall(r'\d{2} male', post))
    # age_certain.extend(re.findall(r'\d{2} female', post))
    # age_certain.extend(re.findall(r'\d{2} man', post))
    # age_certain.extend(re.findall(r'\d{2} woman', post))

    # if(len(age_certain) != 0):
    #     print(f'age certain: {age_certain}')
    
    # if(len(age_certain) > 2):
    #     print(post)

    # age.extend(re.findall(r' (\d{2}),? ', post))
    # age.extend(re.findall(r'\d{2}[s]', post))
    # # if len(age) == 0:
    # #     age = re.findall(r'\((\d{2,3})\)', post)
    # # print(post + " --- AGE: "+ str(set(age)))
    # if(len(age) != 0):
    #     print("AGE: "+ str(set(age)))
    # if(len(age) > 2):
    #     print(post)

In [154]:
def get_age_gender(df):
    ages = {}
    genders = {"male": [], "female": []}
    for post in df['selftext']:
        rex = re.findall(r'Age[\:\s](\d{2})', post)
        rex.extend(re.findall(
            # r'(?!.*((when) | till))' +
            r'((i am)|(im))' + 
            r'\s*((a)|(turning)|(going to)|(gonna be)|(gonna turn))?' + 
            r'\s*(\d{2})' + 
            r'\s*[a-z](year|[mf]|male|female|man|woman)?', 
            post))

        if(len(rex) > 0):
            # print(rex)

            all_ages = [int(match[-2]) for match in rex]
            
            if(max(all_ages) - min(all_ages) <= 2):
                crnt_age = all_ages[0]
            else:
                crnt_age = min(all_ages)

            try:
                ages[crnt_age].append(post)
            except KeyError:
                ages[crnt_age] = [post]

            # if(len(set(all_ages)) > 1):
            #     print(crnt_age)
            #     print(all_ages)
            #     print(rex)
            #     print(post)
            # print(crnt_age)
                
            all_genders = {gender[-1] for gender in rex if gender[-1] != 'year' and gender[-1] != ''}
            if(len(all_genders) > 0):
                # print(all_genders)
                gender = all_genders.pop()

                if(gender == "m" or gender == "male" or gender == "man"):
                    genders["male"].append(post)
                else:
                    genders["female"].append(post)
    return ages, genders

In [176]:
def reject_bad_ages(ages:dict, min_age=9, max_age=65) -> dict:
    bad_ages = set()
    for age in ages:
        if(not (min_age < age and age < max_age)):
            bad_ages.add(age)
        if(len(ages[age]) <= 1):
            bad_ages.add(age)
    for bad_age in bad_ages:
        ages.pop(bad_age)
    return ages

In [188]:
def calc_count_avg(ages:dict) -> (int, float):
    count = 0
    total_ages = 0
    for k in ages:
        crnt_len = len(ages[k])
        count += crnt_len
        total_ages += k * crnt_len

    return count, total_ages / count

## Summary of Ages Found

In [197]:
doomer_ages, doomer_genders = get_age_gender(doomer_posts)
lonely1_ages, lonely1_genders = get_age_gender(lonely1_posts)
lonely2_ages, lonely2_genders = get_age_gender(lonely2_posts)
depr_ages, depr_genders = get_age_gender(depr_posts)

print("Number of Ages Found, Ave. Age")
print(calc_count_avg(doomer_ages))
print(calc_count_avg(lonely1_ages))
print(calc_count_avg(lonely2_ages))
print(calc_count_avg(depr_ages))

Number of Ages Found, Ave. Age
(259, 22.07335907335907)
(801, 23.578027465667915)
(466, 24.107296137339056)
(1378, 22.728592162554428)


In [198]:
doomer_ages = reject_bad_ages(doomer_ages)
lonely1_ages = reject_bad_ages(lonely1_ages)
lonely2_ages = reject_bad_ages(lonely2_ages)
depr_ages = reject_bad_ages(depr_ages)

print("Number of Ages Found, Ave. Age")
print(calc_count_avg(doomer_ages))
print(calc_count_avg(lonely1_ages))
print(calc_count_avg(lonely2_ages))
print(calc_count_avg(depr_ages))

Number of Ages Found, Ave. Age
(253, 21.26086956521739)
(787, 23.092757306226176)
(456, 23.55482456140351)
(1368, 22.47295321637427)


## Summary of Genders Found

In [184]:
print("Male, Female")
print(len(doomer_genders["male"]), len(doomer_genders["female"]))
print(len(lonely1_genders["male"]), len(lonely1_genders["female"]))
print(len(lonely2_genders["male"]), len(lonely2_genders["female"]))
print(len(depr_genders["male"]), len(depr_genders["female"]))

Male, Female
5 2
10 2
5 2
14 4


# Frequency Analysis

In [199]:
doomer_corpus = doomer_posts['selftext'].str.cat(sep=" ")
lonely1_corpus = lonely1_posts['selftext'].str.cat(sep=" ")
lonely2_corpus = lonely2_posts['selftext'].str.cat(sep=" ")
depr_corpus = depr_posts['selftext'].str.cat(sep=" ")

In [201]:
themes_keywords = {
   "nihilism": ["nihil", "dark", "hopeless", "meaning", "nothing", "empty", "death", "life", "nonesense", "bleak", "reason", "pessimis", "worth"],
   "doom": ["doom", "collapse", "brutal", "survive", "consciousness", "conscience", "extinction", "clown", "climate", "ecology", "destitute"],
   "suicide": ["suicid", "kill", "kms", "hang"],
   "alone": ["lone", "friend", "isolated", "island", "people", "presence", "nobody", "social", "bore", "boring"],
    "medical": ["medical", "mental", "health", "issue", "diagnos", "medication", "pills", "anxiety", "psycholog", "help", "therap", "disorder", "episode", "gender", "dysphoria", "surgery", "mg", "lexapro", "doctor", "neurology"],
    "relationship": ["gf", "bf", "sex", "coom", "wank", "porn", "girl", "boy", "slut", "hore", "kiss", "edging", "virgin", "love", "masturbat", "simp", "woman", "women", "relationship", "hug", "dating", "date", "intimacy", "girlfriend", "boyfriend", "fiance", "husband", "wife", "breakup", "marriage", "divorce", "understanding", "parent", "cheated"],
   "childhood": ["child", "abuse", "neglect", "mom", "parents", "dad", "mother", "father", "mum", "school", "course", "college", "university", "grade", "teacher", "professor", "youth", "young", "recess", "class"],
   "job": ["job", "work", "occupation", "career "],
   "mental health": ["mental", "depress", "diagnos", "anxiety", "anxious", "harm", "smile", "happy", "happi", "toxic", "emotion", "energy", "sad", "pain", "ache", "happy", "happiness", "motivat", "unlove", "uncared"],
    # "emotional emptiness": ["emotion", "energy", "sad", "pain", "ache", "happy", "happiness", "motivat", "unloved", "unloveable", "uncared"],
   "drugs": ["drug" "cig", "drunk", "drink", "smoke", "weed", "alcohol", "booze", "crack", "coke", "cocaine", "hemp", "marijuana", "cannabis", "intox"],
    "social issues": ["social", "shy", "timid", "talk", "support", "insecure", "rejection", "awkward", "fit", "society", "conversation", "abandon", "extroverted", "introverted", "unattractive", "ugly"],
    "responsibilities": ["responsibl", "death", "family", "pass", "job", "miscarriage", "victim"],
}

In [202]:
themes = list(themes_keywords.keys())
keywords = [themes_keywords[themes] for themes in themes_keywords]
themes_keywords_df = pd.DataFrame({
    "themes": themes,
    "keywords": keywords
})

In [203]:
themes_keywords_latex = themes_keywords_df.to_latex(index=False, float_format="%.2f")

  themes_keywords_latex = themes_keywords_df.to_latex(index=False, float_format="%.2f")


In [205]:
# with open("Report/Sections/table_keywords1.tex", "w") as f:
#     f.write(themes_keywords_latex)

In [206]:
frequencies_dict = {
    "themes": [],
    "re": [],
    "doomer_matches": [],
    "lonely1_matches": [],
    "lonely2_matches": [],
    "depr_matches": []
}

for theme in themes_keywords:
    theme_re = re.compile('|'.join(themes_keywords[theme]))
    matches_doomer = re.findall(theme_re, doomer_corpus)
    matches_lonely1 = re.findall(theme_re, lonely1_corpus)
    matches_lonely2 = re.findall(theme_re, lonely2_corpus)
    matches_depr = re.findall(theme_re, depr_corpus)

    frequencies_dict["themes"].append(theme)
    frequencies_dict["re"].append(theme_re)
    frequencies_dict["doomer_matches"].append(len(matches_doomer))
    frequencies_dict["lonely1_matches"].append(len(matches_lonely1))
    frequencies_dict["lonely2_matches"].append(len(matches_lonely2))
    frequencies_dict["depr_matches"].append(len(matches_depr))

frequencies_df = pd.DataFrame(frequencies_dict)

In [207]:
num_doomer_matches = frequencies_df["doomer_matches"].sum()
num_lonely1_matches = frequencies_df["lonely1_matches"].sum()
num_lonely2_matches = frequencies_df["lonely2_matches"].sum()
num_depr_matches = frequencies_df["depr_matches"].sum()

In [209]:
len_doomer_corpus = len(doomer_corpus)
len_lonely1_corpus = len(lonely1_corpus)
len_lonely2_corpus = len(lonely2_corpus)
len_depr_corpus = len(depr_corpus)

In [210]:
frequencies_df["doomer_freqs_tot"] = frequencies_df["doomer_matches"].div(len_doomer_corpus).mul(100)
frequencies_df["lonely1_freqs_tot"] = frequencies_df["lonely1_matches"].div(len_lonely1_corpus).mul(100)
frequencies_df["lonely2_freqs_tot"] = frequencies_df["lonely2_matches"].div(len_lonely2_corpus).mul(100)
frequencies_df["depr_freqs_tot"] = frequencies_df["depr_matches"].div(len_depr_corpus).mul(100)

frequencies_df["doomer_freqs_whole"] = frequencies_df["doomer_matches"].div(num_doomer_matches).mul(100)
frequencies_df["lonely1_freqs_whole"] = frequencies_df["lonely1_matches"].div(num_lonely1_matches).mul(100)
frequencies_df["lonely2_freqs_whole"] = frequencies_df["lonely2_matches"].div(num_lonely2_matches).mul(100)
frequencies_df["depr_freqs_whole"] = frequencies_df["depr_matches"].div(num_depr_matches).mul(100)

In [211]:
frequencies_df[["themes", "doomer_freqs_whole", "lonely1_freqs_whole", "lonely2_freqs_whole", "depr_freqs_whole"]]

Unnamed: 0,themes,doomer_freqs_whole,lonely1_freqs_whole,lonely2_freqs_whole,depr_freqs_whole
0,nihilism,18.087193,9.53324,9.232299,12.476619
1,doom,2.60759,0.211236,0.128607,0.282772
2,suicide,3.868284,3.034122,2.976834,3.982744
3,alone,16.725212,29.963154,32.343913,14.347703
4,medical,5.708683,5.708182,5.524976,11.832982
5,relationship,12.775036,15.313434,14.239416,10.698114
6,childhood,10.294593,8.565873,9.047105,11.57729
7,job,5.434993,4.255932,4.10172,6.246182
8,mental health,10.76439,8.593478,8.357769,16.114243
9,drugs,2.150723,0.728525,0.781933,1.209182


# Prediction using the Frequency

In [212]:
comp_doomer = list(frequencies_df["doomer_freqs_whole"])
comp_lonely1 = list(frequencies_df["lonely1_freqs_whole"])
comp_lonely2 = list(frequencies_df["lonely2_freqs_whole"])
comp_depr = list(frequencies_df["depr_freqs_whole"])

In [213]:
def get_distance(vec1, vec2, emphasize_first=4, emphasize_by=2):
    sum = 0
    for i in range(len(vec1)):
        if(i < emphasize_first):
            sum += ((vec1[i] - vec2[i]) ** 2) ** emphasize_by
        else:
            sum += (vec1[i] - vec2[i]) ** 2
    return sqrt(sum)

def add_list_vectors(list_vectors):
    added_vector = [0 for x in list_vectors[0]]
    for vector in list_vectors:
        for i in range(len(vector)):
            added_vector[i] += vector[i]
    return added_vector

def scale_vector(vector, scalar):
    for i in range(len(vector)):
        vector[i] *= scalar
    return vector

def ave_list_vectors(list_vectors):
    return scale_vector(add_list_vectors(list_vectors), (1 / len(list_vectors)))

In [214]:
def get_num_correct_pred(texts, label_num, re_series, num_categories=4):

    # identify the correct and incorrect labels for binary classification
    if(label_num == 1 or label_num == 2):
        comp_correct = ave_list_vectors([comp_lonely1, comp_lonely2])
        comp_incorrect = ave_list_vectors([comp_doomer, comp_depr])
    elif(label_num == 0):
        comp_correct = comp_doomer
        comp_incorrect = ave_list_vectors([comp_lonely1, comp_lonely2, comp_depr])
    elif(label_num == 3):
        comp_correct = comp_depr
        comp_incorrect = ave_list_vectors([comp_lonely1, comp_lonely2, comp_doomer])

    # doomer, lonely1, lonely2, depr, correct, incorrect, no_match
    predictions = [0 for _ in range(num_categories + 1 + 2)]
    lonely1_pred, lonely2_pred = 0, 0

    min_num_matches = 1
    for text in texts:
        comp_vector = []
        for theme_re in re_series:
            num_matches = len(re.findall(theme_re, text))
            comp_vector.append(num_matches)
        
        # let's not forget to normalise it as a whole of 100%
        num_matches = sum(comp_vector)
        if(num_matches >= min_num_matches):
            for i in range(len(comp_vector)):
                comp_vector[i] = (comp_vector[i] / num_matches) * 100

            dists = [
                get_distance(comp_vector, comp_doomer),
                get_distance(comp_vector, comp_lonely1),
                get_distance(comp_vector, comp_lonely2),
                get_distance(comp_vector, comp_depr)]

            shortest_dist_index = dists.index(min(dists))
            predictions[shortest_dist_index] += 1

            if(get_distance(comp_vector, comp_correct) < get_distance(comp_vector, comp_incorrect)):
                # correct binary
                predictions[-3] += 1
            else:
                # incorrect binary
                predictions[-2] += 1

            if(label_num == 1 or label_num == 2):
                if(get_distance(comp_vector, comp_lonely1) < get_distance(comp_vector, comp_lonely2)):
                    lonely1_pred += 1
                else:
                    lonely2_pred += 1
        else:
            # no match
            predictions[-1] += 1

    for pred in predictions:
        print(pred, ' ', end='')

    accuracy = predictions[-3] / (predictions[-3] + predictions[-2])
    print(accuracy, ' ', end='')

    if(label_num == 1 or label_num == 2):
        print(lonely1_pred, ' ', end='')
        print(lonely2_pred, ' ', end='')
        if(label_num == 1):
            print(lonely1_pred / (lonely1_pred + lonely2_pred), ' ', end='')
        else:
            print(lonely2_pred / (lonely1_pred + lonely2_pred), ' ', end='')

    print('')

In [215]:
min_length = 0
print("doomer", "lonely1", "lonely2", "depr", "correct", "incorrect", "no_match", "binary accuracy", "lonely1", "lonely2")
get_num_correct_pred(doomer_posts[doomer_posts["text_len"] >= min_length]["selftext"], 0, frequencies_df["re"])
get_num_correct_pred(lonely1_posts[lonely1_posts["text_len"] >= min_length]["selftext"], 1, frequencies_df["re"])
get_num_correct_pred(lonely2_posts[lonely2_posts["text_len"] >= min_length]["selftext"], 2, frequencies_df["re"])
get_num_correct_pred(depr_posts[depr_posts["text_len"] >= min_length]["selftext"], 3, frequencies_df["re"])

doomer lonely1 lonely2 depr correct incorrect no_match binary accuracy lonely1 lonely2
1213  291  466  1031  1948  1053  15  0.6491169610129957  
712  860  2330  1076  3264  1714  11  0.6556850140618722  2648  2330  0.5319405383688228  
336  533  1496  437  2059  743  3  0.7348322626695217  1306  1496  0.5339043540328337  
2230  867  1059  4834  6632  2358  13  0.7377085650723025  


In [216]:
frequencies_latex = frequencies_df[["themes", "doomer_freqs_whole", "lonely1_freqs_whole", "lonely2_freqs_whole", "depr_freqs_whole"]].to_latex(index=False, float_format="%.2f")
print(frequencies_latex)

\begin{tabular}{lrrrr}
\toprule
          themes &  doomer\_freqs\_whole &  lonely1\_freqs\_whole &  lonely2\_freqs\_whole &  depr\_freqs\_whole \\
\midrule
        nihilism &               18.09 &                 9.53 &                 9.23 &             12.48 \\
            doom &                2.61 &                 0.21 &                 0.13 &              0.28 \\
         suicide &                3.87 &                 3.03 &                 2.98 &              3.98 \\
           alone &               16.73 &                29.96 &                32.34 &             14.35 \\
         medical &                5.71 &                 5.71 &                 5.52 &             11.83 \\
    relationship &               12.78 &                15.31 &                14.24 &             10.70 \\
       childhood &               10.29 &                 8.57 &                 9.05 &             11.58 \\
             job &                5.43 &                 4.26 &                 4.10 & 

  frequencies_latex = frequencies_df[["themes", "doomer_freqs_whole", "lonely1_freqs_whole", "lonely2_freqs_whole", "depr_freqs_whole"]].to_latex(index=False, float_format="%.2f")
