In [1]:
# Ensure the accessibility between each varaiables

!pip install import-ipynb

Collecting import-ipynb
  Downloading import_ipynb-0.1.4-py3-none-any.whl (4.1 kB)
Installing collected packages: import-ipynb
Successfully installed import-ipynb-0.1.4
[0m

In [2]:
from pyspark.sql import SparkSession
from pyspark.sql.types import StringType, DateType, FloatType, IntegerType, TimestampType, ArrayType, StructType, StructField
from pyspark.sql.functions import from_unixtime, sum, rank,lag, explode, expr,spark_partition_id, to_date, coalesce, lit, to_timestamp, col, month, concat, count, max, when, dayofweek, datediff,dense_rank, desc, date_format
import pyspark.sql.functions as F
from pyspark.sql.window import Window
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import re
from pyspark.ml.feature import Tokenizer, StopWordsRemover, Word2Vec,HashingTF,IDF, CountVectorizer,VectorAssembler
from pyspark.sql.functions import udf
from pyspark.ml.feature import Tokenizer, CountVectorizer, IDF
from pyspark.ml import Pipeline,PipelineModel
from sparknlp.base import DocumentAssembler, Finisher
from sparknlp.annotator import LemmatizerModel
from pyspark.ml.classification import LinearSVC, LogisticRegression, LinearSVCModel
from pyspark.ml.evaluation import MulticlassClassificationEvaluator,BinaryClassificationEvaluator
from pyspark.ml.tuning import ParamGridBuilder, CrossValidator
from google.cloud import storage
import sparknlp
import os
import warnings
import import_ipynb


In [3]:
def train_test_val_split(df, train_prob = 0.7, test_prob=0.2, val_prob= 0.1):
    train_df, test_df, validation_df = df.randomSplit([train_prob, test_prob, val_prob],2023)
    return train_df, test_df, validation_df

In [4]:
spark = SparkSession.builder.appName("LoadModel").getOrCreate()

# Get the context of the Pyspark environment
spark.sparkContext.getConf().getAll()

# Store spark context as a variable
sc = spark.sparkContext

stoarge_client = storage.Client()

In [5]:
spark = SparkSession.builder.appName("AddLabelColumn").getOrCreate()

# Get the context of the Pyspark environment
spark.sparkContext.getConf().getAll()

# Store spark context as a variable
sc = spark.sparkContext

stoarge_client = storage.Client()

In [6]:
reddit_data_df = spark.read.parquet("gs://msca-bdp-student-gcs/Group2_Final_Project/reddit_data/",header=True, inferSchema=True)
reddit_data_df = reddit_data_df.dropna()

train_df, test_df, validation_df = train_test_val_split(reddit_data_df)

                                                                                

In [7]:
def list_all_files(bucket_name, folder_name):
    bucket = stoarge_client.bucket(bucket_name)
    file_lst = [(blob.name.split("/")[2].split("_")[0]) for blob in bucket.list_blobs(prefix = folder_name)]
    file_lst = list(set(file_lst))
    try:
        file_lst.remove("")
    except:
        return file_lst
  
    return file_lst



In [8]:
def test_preprocess_pipeline(train_df):
    tokenizer = Tokenizer(inputCol="body", outputCol="token")

    # remove stop words
    remover = StopWordsRemover(inputCol="token", outputCol="filtered_token")

    # vecotorize the words
    vectorizer = CountVectorizer(inputCol="filtered_token", outputCol="features")
    idf = IDF(inputCol="features", outputCol="tfidf_features")

    # assemble all features into 1 column
    assembler = VectorAssembler(inputCols=["features","tfidf_features"], outputCol="final_features")

    # Create the preprocessing piplines for the tweets
    pipeline = Pipeline().setStages([
        tokenizer,
        remover,
        vectorizer,
        idf,
        assembler
    ])
    
    model = pipeline.fit(train_df)
    return model

In [9]:
def load_model_from_path(bucket_name,folder_name):
    
   
    model_lst_path = list_all_files(bucket_name, folder_name)
    model_lst = {}
    
    for model_name in model_lst_path:
        prefix = 'gs://'
        suffix = '_model'
        model_path = os.path.join(prefix, bucket_name, folder_name, model_name+suffix)
        print("Attempting to load model from path:", model_path)
        try:
            loaded_model = PipelineModel.load(model_path)
            print("Model loaded successfully.")
        except:
            print("Model loading failed")
            continue
        model_lst["r/"+ model_name] = loaded_model
    
    return model_lst

In [10]:
# Tokenize and stop word removal
def clean_text(text):
    # Deal with component words
    re.sub(r'(?<=[a-z])(?=[A-Z])', ' ', text)
    # Convert to lowercase
    text = text.lower()
    # Remove Http / Https links in the text
    text = re.sub(r'http\S+', '', text)
    text = re.sub(r'https\S+', '', text)
    # Remove special characters and numbers
    text = re.sub(r'[^a-zA-Z\s]', '', text)
    # Handling repeated characters (more than 2)
    text = re.sub(r'(.)\1+', r'\1\1', text)
    # Remove extra spaces
    text = re.sub(r'\s+', ' ', text).strip()
    return text

In [11]:
def encode_by_tags(df,tags_lst,labelcol):
    
    df_res = df
    
    for tag in tags_lst:
        print(tag)
        df_res= df_res.withColumn(tag, when(col(labelcol) == tag, 1).otherwise(0))

    return df_res

In [12]:
def clean_df(df, inputcol):
    clean_text_udf = udf(clean_text, StringType())
    df_cleaned = df.withColumn(inputcol, clean_text_udf(df[inputcol]))
    return df_cleaned

In [13]:
def get_model_spec_from_loading(test_df,model_lst,inputcol,labelcol):
    
    model_spec = {}
    tags_lst = [tag for tag in model_lst.keys()]
    print(tags_lst)
    
    # encode df by tag_lst
    encoded_df = encode_by_tags(test_df,tags_lst,labelcol)
    
   # clean the df 
    df_cleaned = clean_df(encoded_df,inputcol)

    # preprocessed the test_df
    preprocess_model = test_preprocess_pipeline()
    preprocess_df= preprocess_model.transform(df_cleaned)
        
    # test through each model
    for tag, model in model_lst.items():
        f1_evaluator = MulticlassClassificationEvaluator(labelCol=tag, predictionCol="prediction", metricName="f1")
        accuracy_evaluator = MulticlassClassificationEvaluator(labelCol=tag, predictionCol="prediction", metricName="accuracy")
        
        # generate the predictions based on preprocess_df
        predictions = model.transform(preprocess_df)
        
        #evaluate the predictions
        f1_score = f1_evaluator.evaluate(predictions)
        accuracy = accuracy_evaluator.evaluate(predictions)
        
        model_spec[tag] = {"f1_score": f1_score, "accuracy": accuracy}
    return model_spec       

In [None]:
# test_data= [("If you look at the example in the article, they are wildly diverging after the first sentence, leading me to belive that the prompt has been write an admit note for a person with he history of left breast cancer. I would really not like an AI to fabricate information about what tests have been conducted and what their results were. Doctors have enough on their mind as it is. They don't need to also be tasked with babysitting text generators.","r/science")]
# test_df = spark.createDataFrame(test_data,["body","subreddit_name_prefixed"])

inputcol = "body"
labelcol = "subreddit_name_prefixed"

bucket_name = 'msca-bdp-student-gcs'
folder_name = 'Group2_Final_Project/modelr/'
model_lst = load_model_from_path(bucket_name,folder_name)

model_spec = get_model_spec_from_loading(test_df, model_lst, inputcol = "body",labelcol = "subreddit_name_prefixed")
print(model_spec)

Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Fitness_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/technology_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/LifeProTips_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/books_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Foodforthought_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/sports_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/femalefashionadvice_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/lifehacks_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/relationship_model
Model loading failed
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/space_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/socialskills_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/history_model
Model loaded successfully.
Attempting to load model from path: gs://

23/11/28 02:18:27 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 2.9 MiB
23/11/28 02:21:34 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 2.9 MiB
23/11/28 02:21:39 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 02:23:52 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 02:25:39 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 02:27:20 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 02:28:47 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 02:30:09 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 02:31:23 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task bi

{'r/Fitness': {'f1_score': 0.9793339331253917, 'accuracy': 0.9761830673744356}, 'r/technology': {'f1_score': 0.920323135964179, 'accuracy': 0.9418568259850725}, 'r/LifeProTips': {'f1_score': 0.9576000708924204, 'accuracy': 0.9699858479294928}, 'r/books': {'f1_score': 0.9680394004424682, 'accuracy': 0.9771737017510534}, 'r/Foodforthought': {'f1_score': 0.9991634348672491, 'accuracy': 0.9994189446840196}, 'r/sports': {'f1_score': 0.9878921098055788, 'accuracy': 0.9913362255520949}, 'r/femalefashionadvice': {'f1_score': 0.9944861047442708, 'accuracy': 0.9960730459282889}, 'r/lifehacks': {'f1_score': 0.9935000792175512, 'accuracy': 0.9956273554887554}, 'r/space': {'f1_score': 0.9786004824018987, 'accuracy': 0.9838969119386014}, 'r/socialskills': {'f1_score': 0.9912008039537795, 'accuracy': 0.9940303772720472}, 'r/history': {'f1_score': 0.9977390357815209, 'accuracy': 0.9983235133964241}, 'r/EatCheapAndHealthy': {'f1_score': 0.9923084164495303, 'accuracy': 0.9948153774077289}, 'r/UpliftingN

                                                                                

In [None]:
# test on Youtube data 

youtube_comments_df = spark.read\
            .option("quote", "\"") \
            .option("escape", "\"") \
            .option("multiLine", True) \
            .option("ignoreLeadingWhiteSpace", True) \
            .csv('gs://msca-bdp-student-gcs/Group2_Final_Project/scrapy_data_youtube/merged/youtube_comment_selected.csv', header=True, inferSchema=True)

youtube_comments_df = youtube_comments_df.dropna()


inputcol = "body"
labelcol = "subreddit_name_prefixed"


bucket_name = 'msca-bdp-student-gcs'
folder_name = 'Group2_Final_Project/modelr/'
model_lst = load_model_from_path(bucket_name,folder_name)

yotube_model_spec = get_model_spec_from_loading(youtube_comments_df, model_lst, inputcol = "body",labelcol = "subreddit_name_prefixed")


                                                                                

Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Fitness_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/technology_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/LifeProTips_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/books_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Foodforthought_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/sports_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/femalefashionadvice_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/lifehacks_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/relationship_model
Model loading failed
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/space_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/socialskills_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/history_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/EatCheapAndHealthy_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/UpliftingNews_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/gardening_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/changemyview_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/IWantToLearn_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Documentaries_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/mildlyinteresting_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/buildapc_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/AskHistorians_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/todayilearned_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Fantasy_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/explainlikeimfive_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/programming_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/malefashionadvice_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/suggestmeabook_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/gadgets_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/IAmA_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/DIY_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/gaming_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/WritingPrompts_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/bodyweightfitness_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/personalfinance_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/GetMotivated_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Damnthatsinteresting_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/philosophy_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/travel_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/askscience_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/YouShouldKnow_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/podcasts_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/tifu_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/science_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/scifi_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Showerthoughts_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/boardgames_model


                                                                                

Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/SkincareAddiction_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/bestof_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/ifyoulikeblank_model
Model loaded successfully.
Attempting to load model from path: gs://msca-bdp-student-gcs/Group2_Final_Project/modelr/Games_model
Model loaded successfully.
['r/Fitness', 'r/technology', 'r/LifeProTips', 'r/books', 'r/Foodforthought', 'r/sports', 'r/femalefashionadvice', 'r/lifehacks', 'r/space', 'r/socialskills', 'r/history', 'r/EatCheapAndHealthy', 'r/UpliftingNews', 'r/gardening', 'r/changemyview', 'r/IWantToLearn', 'r/Documentaries', 'r/mildlyinteresting', 'r/buildapc', 'r/AskHistorians', 'r/todayilearned', 'r/Fantasy', 'r/explainlikeimfive', 'r/programming', 'r/malefashionadvice',

23/11/28 03:44:12 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 2.9 MiB
23/11/28 03:45:13 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 2.9 MiB
23/11/28 03:45:16 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 03:45:57 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 03:46:37 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 03:47:17 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 03:47:57 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 03:48:37 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task binary with size 41.7 MiB
23/11/28 03:49:16 WARN org.apache.spark.scheduler.DAGScheduler: Broadcasting large task bi

In [None]:
# convert the model into dataframe for better visualization
model_data = [(tag, specs["f1_score"], specs["accuracy"]) for tag, specs in model_spec.items()]


schema = StructType([
    StructField("model", StringType(), True),
    StructField("f1_score", FloatType(), True),
    StructField("test_accuracy", FloatType(), True)
])

# Create DataFrame
model_df = spark.createDataFrame(model_data, schema)
model_df.show(100)

# save to GCS
model_pd = model_df.toPandas()
model_pd.to_csv("gs://msca-bdp-student-gcs/Group2_Final_Project/model_metric/model_metric.csv")


+--------------------+----------+-------------+
|               model|  f1_score|test_accuracy|
+--------------------+----------+-------------+
|           r/Fitness|0.97933394|   0.97618306|
|        r/technology|0.92032313|    0.9418568|
|       r/LifeProTips|0.95760006|   0.96998584|
|             r/books| 0.9680394|    0.9771737|
|    r/Foodforthought|0.99916345|     0.999419|
|            r/sports| 0.9878921|    0.9913362|
|r/femalefashionad...| 0.9944861|   0.99607307|
|         r/lifehacks|0.99350005|   0.99562734|
|             r/space| 0.9786005|    0.9838969|
|      r/socialskills| 0.9912008|   0.99403036|
|           r/history|  0.997739|    0.9983235|
|r/EatCheapAndHealthy|0.99230844|   0.99481535|
|     r/UpliftingNews|0.98633164|   0.99046564|
|         r/gardening|  0.979303|   0.98509455|
|      r/changemyview| 0.9621093|    0.9739003|
|      r/IWantToLearn| 0.9986431|    0.9990365|
|     r/Documentaries| 0.9931315|    0.9952012|
| r/mildlyinteresting|0.90749973|     0.

In [19]:
youtube_model_data = [(tag, specs["f1_score"], specs["accuracy"]) for tag, specs in yotube_model_spec.items()]

youtube_model_df = spark.createDataFrame(youtube_model_data, schema)
youtube_model_df.show(100)

schema = StructType([
    StructField("model", StringType(), True),
    StructField("f1_score", FloatType(), True),
    StructField("test_accuracy", FloatType(), True)
])

youtube_model_pd = youtube_model_df.toPandas()
youtube_model_pd.to_csv("gs://msca-bdp-student-gcs/Group2_Final_Project/model_metric/youtube_model_metric.csv")

+--------------------+----------+-------------+
|               model|  f1_score|test_accuracy|
+--------------------+----------+-------------+
|           r/Fitness| 0.9996584|   0.99931705|
|        r/technology|0.99841624|    0.9968375|
|       r/LifeProTips| 0.7607092|   0.83309275|
|             r/books| 0.9994133|    0.9988273|
|    r/Foodforthought| 0.9999987|   0.99999744|
|            r/sports| 0.9994839|   0.99896836|
|r/femalefashionad...|   0.99985|       0.9997|
|         r/lifehacks|0.99999017|   0.99998033|
|             r/space|0.99920106|    0.9984034|
|      r/socialskills| 0.9999372|   0.99987435|
|           r/history|0.99992603|    0.9998521|
|r/EatCheapAndHealthy| 0.9998769|   0.99975383|
|     r/UpliftingNews|0.99984956|    0.9996991|
|         r/gardening| 0.9991895|    0.9983803|
|      r/changemyview|0.99956346|    0.9991273|
|      r/IWantToLearn|  0.999997|   0.99999404|
|     r/Documentaries| 0.9999393|   0.99987864|
| r/mildlyinteresting| 0.3783133|   0.53