# importing Data and Loading Submission File

In [1]:
import pandas as pd
import zstandard
import os
import json

def read_zst_folder(folder_path):
    # Initialize an empty list to store DataFrame objects
    dfs = []

    # Iterate through each file in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".zst"):
            file_path = os.path.join(folder_path, file_name)
            
            # Read and decompress the zst file
            with open(file_path, 'rb') as f:
                decompressor = zstandard.ZstdDecompressor()
                with decompressor.stream_reader(f) as reader:
                    decompressed_data = reader.read()

            # Decode the decompressed data as JSON
            json_data = decompressed_data.decode('utf-8')
            
            # Parse the JSON data into a Python object (list of dictionaries)
            data = [json.loads(line) for line in json_data.split('\n') if line.strip()]

            # Convert the data to a DataFrame
            df = pd.DataFrame(data)
            
            # Append the DataFrame to the list
            dfs.append(df)

    # Concatenate all DataFrames in the list into a single DataFrame
    combined_df = pd.concat(dfs, ignore_index=True)
    
    return combined_df

# Specify the path to the folder containing zst files
folder_path = 'C:\\Users\\HP\Desktop\\Middlesex Course Content\\Giovanni Proposed Projects\\US Political Subreddits Selection (19-07-2024)\\Selected Subreddits\\Whole Subreddits\\Liberal_submissions'

# Call the function to read the zst folder into a DataFra
df_submissions = read_zst_folder(folder_path)
# Set display option to show all columns
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)


# Now you can work with the DataFrame (df)
df_submissions = df_submissions[['author', 'id','name', 'created_utc', 'subreddit_id', 'permalink', 'title', 'selftext', 'url', 'retrieved_on']]
df_submissions.head()

  from pandas.core import (


Unnamed: 0,author,id,name,created_utc,subreddit_id,permalink,title,selftext,url,retrieved_on
0,elshizzo,8hcf1,,1241293173,t5_2qxt5,/r/Liberal/comments/8hcf1/how_an_american_in_holland_learned_to_love_the/,How an American in Holland learned to love the European welfare state,,http://www.nytimes.com/2009/05/03/magazine/03european-t.html?_r=1,1522793000.0
1,elshizzo,8hcfa,,1241293240,t5_2qxt5,/r/Liberal/comments/8hcfa/obama_and_roosevelt_a_comparison_of_the_first_100/,Obama and Roosevelt: A Comparison of the First 100 Days,,http://www.politicalaffairs.net/article/articleview/8463/,1522793000.0
2,ambroseburns,8hchm,,1241294002,t5_2qxt5,/r/Liberal/comments/8hchm/al_gore_for_supreme_court_justice/,Al Gore for Supreme Court Justice?,,http://www.americamagazine.org/blog/entry.cfm?blog_id=2&amp;id=33894980-3048-741E-3428248481116938,1522793000.0
3,elshizzo,8hcih,,1241294312,t5_2qxt5,/r/Liberal/comments/8hcih/republicans_eating_their_own/,Republicans eating their own,,http://allspinzone.com/wp/2009/05/02/republicans-eating-their-own-michigan-edition/,1522793000.0
4,dangph,8he0a,,1241313287,t5_2qxt5,/r/Liberal/comments/8he0a/what_to_do_about_the_huffington_posts_support_for/,What to do about the Huffington Post's support for anti-vaccine nonsense and quackery?,,http://scienceblogs.com/insolence/2009/05/what_to_do_about_huffpo.php,1522793000.0


In [2]:
df_submissions.tail()

Unnamed: 0,author,id,name,created_utc,subreddit_id,permalink,title,selftext,url,retrieved_on
67441,Hello832,zzsynm,t3_zzsynm,1672489276,t5_2qxt5,/r/Liberal/comments/zzsynm/on_the_functions_of_liberalism_in_a_modern_context/,On the Functions of Liberalism in a Modern Context,[removed],https://www.reddit.com/r/Liberal/comments/zzsynm/on_the_functions_of_liberalism_in_a_modern_context/,1673168000.0
67442,eimilegippleemq,zzuesx,t3_zzuesx,1672494192,t5_2qxt5,/r/Liberal/comments/zzuesx/get_elementor_pro_free_100_legal_auto_updates/,Get Elementor Pro Free | 100% Legal + Auto Updates - YouTube,,https://www.youtube.com/watch?v=WsiDDuB6Jdo,1673168000.0
67443,[deleted],zzwdev,t3_zzwdev,1672500182,t5_2qxt5,/r/Liberal/comments/zzwdev/brazils_bolsonaro_makes_tearful_final_broadcast/,Brazil’s Bolsonaro makes tearful final broadcast and leaves country,[removed],,1673168000.0
67444,[deleted],1002hj2,t3_1002hj2,1672517326,t5_2qxt5,/r/Liberal/comments/1002hj2/join_a_gym/,Join a Gym.,[removed],,1673168000.0
67445,x777-333x,1005lpj,t3_1005lpj,1672526477,t5_2qxt5,/r/Liberal/comments/1005lpj/hello_everyone_i_identify_as_an_entitled_baby/,"Hello everyone, I identify as an entitled baby.",[removed],https://www.reddit.com/r/Liberal/comments/1005lpj/hello_everyone_i_identify_as_an_entitled_baby/,1673168000.0


In [3]:
df_submissions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 67446 entries, 0 to 67445
Data columns (total 10 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   author        67446 non-null  object 
 1   id            67446 non-null  object 
 2   name          42525 non-null  object 
 3   created_utc   67446 non-null  object 
 4   subreddit_id  67446 non-null  object 
 5   permalink     67446 non-null  object 
 6   title         67446 non-null  object 
 7   selftext      67446 non-null  object 
 8   url           67362 non-null  object 
 9   retrieved_on  52238 non-null  float64
dtypes: float64(1), object(9)
memory usage: 5.1+ MB


# Importing Data and Loading Comment File

In [4]:
import pandas as pd
import zstandard
import os
import json

def read_zst_folder(folder_path):
    # Initialize an empty list to store DataFrame objects
    dfs = []

    # Iterate through each file in the folder
    for file_name in os.listdir(folder_path):
        if file_name.endswith(".zst"):
            file_path = os.path.join(folder_path, file_name)
            
            # Read and decompress the zst file
            with open(file_path, 'rb') as f:
                decompressor = zstandard.ZstdDecompressor()
                with decompressor.stream_reader(f) as reader:
                    decompressed_data = reader.read()

            # Decode the decompressed data as JSON
            json_data = decompressed_data.decode('utf-8')
            
            # Parse the JSON data into a Python object (list of dictionaries)
            data = [json.loads(line) for line in json_data.split('\n') if line.strip()]

            # Convert the data to a DataFrame
            df = pd.DataFrame(data)
            
            # Append the DataFrame to the list
            dfs.append(df)

    # Concatenate all DataFrames in the list into a single DataFrame
    combined_df = pd.concat(dfs, ignore_index=True)
    
    return combined_df

# Specify the path to the folder containing zst files
folder_path = 'C:\\Users\\HP\Desktop\\Middlesex Course Content\\Giovanni Proposed Projects\\US Political Subreddits Selection (19-07-2024)\\Selected Subreddits\\Whole Subreddits\\Liberal_comments'
# Call the function to read the zst folder into a DataFrame
df_comments = read_zst_folder(folder_path)
# Set display option to show all columns
pd.set_option('display.max_columns', None)

# Now you can work with the DataFrame (df)
# df_comments.head()
df_comments = df_comments[['id', 'created_utc', 'subreddit', 'link_id', 'name', 'subreddit_id',
       'parent_id', 'retrieved_on', 'author', 'body', 'score']]
df_comments.head()


Unnamed: 0,id,created_utc,subreddit,link_id,name,subreddit_id,parent_id,retrieved_on,author,body,score
0,c09ae8x,1241319864,Liberal,t3_8hefx,t1_c09ae8x,t5_2qxt5,t3_8hefx,1425964000.0,elshizzo,It's a picture of Obama as FDR taken from the frontpage of Time Magazine in case anyone is wonder.\n,1
1,c09aej3,1241320659,Liberal,t3_8heid,t1_c09aej3,t5_2qxt5,t3_8heid,1425964000.0,KableKiB,I would like to say hello to all /r/Liberals. This is my first post here.\n\n*\*wave\**,2
2,c09aen4,1241320979,Liberal,t3_8heid,t1_c09aen4,t5_2qxt5,t3_8heid,1425964000.0,elshizzo,Colbert interviewed this guy for the Better Know a Lobby series.\n\nOne of my favorite colbert bits of all time...\n\nhttp://www.colbertnation.com/the-colbert-report-videos/163835/march-12-2008/better-know-a-lobby---drug-lobby,2
3,c09aeoa,1241321067,Liberal,t3_8heid,t1_c09aeoa,t5_2qxt5,t1_c09aen4,1425964000.0,KableKiB,"Thanks for relevant video but it's not available outside US. Anyone have another link?\n\n**EDIT:** Watched with proxy, not bad but Colbert didn't let him talk enough like in this video.",1
4,c09at5r,1241377574,Liberal,t3_8hefx,t1_c09at5r,t5_2qxt5,t1_c09ae8x,1425964000.0,axord,"Somewhat related question, what's the intended difference between here and [/r/Progressive](http://www.reddit.com/r/progressive/)?",1


In [5]:
df_comments.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 368207 entries, 0 to 368206
Data columns (total 11 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   id            368207 non-null  object 
 1   created_utc   368207 non-null  object 
 2   subreddit     368207 non-null  object 
 3   link_id       368207 non-null  object 
 4   name          173031 non-null  object 
 5   subreddit_id  368207 non-null  object 
 6   parent_id     368207 non-null  object 
 7   retrieved_on  345766 non-null  float64
 8   author        368207 non-null  object 
 9   body          368207 non-null  object 
 10  score         368207 non-null  int64  
dtypes: float64(1), int64(1), object(9)
memory usage: 30.9+ MB


In [6]:
# Merge final_subs_df and final_comments_df on "name" and "link_id"
df = df_submissions.merge(df_comments, how='inner', left_on='name', right_on='link_id')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 224902 entries, 0 to 224901
Data columns (total 21 columns):
 #   Column          Non-Null Count   Dtype  
---  ------          --------------   -----  
 0   author_x        224902 non-null  object 
 1   id_x            224902 non-null  object 
 2   name_x          224902 non-null  object 
 3   created_utc_x   224902 non-null  object 
 4   subreddit_id_x  224902 non-null  object 
 5   permalink       224902 non-null  object 
 6   title           224902 non-null  object 
 7   selftext        224902 non-null  object 
 8   url             224383 non-null  object 
 9   retrieved_on_x  153010 non-null  float64
 10  id_y            224902 non-null  object 
 11  created_utc_y   224902 non-null  object 
 12  subreddit       224902 non-null  object 
 13  link_id         224902 non-null  object 
 14  name_y          137487 non-null  object 
 15  subreddit_id_y  224902 non-null  object 
 16  parent_id       224902 non-null  object 
 17  retrieved_

In [7]:
final_df = df.copy()

# Convert timestamp columns to datetime format
timestamp_columns = ['created_utc_x', 'created_utc_y']
for col in timestamp_columns:
    final_df[col] = pd.to_datetime(final_df[col], unit='s')
    
final_df.head()    

  final_df[col] = pd.to_datetime(final_df[col], unit='s')
  final_df[col] = pd.to_datetime(final_df[col], unit='s')


Unnamed: 0,author_x,id_x,name_x,created_utc_x,subreddit_id_x,permalink,title,selftext,url,retrieved_on_x,id_y,created_utc_y,subreddit,link_id,name_y,subreddit_id_y,parent_id,retrieved_on_y,author_y,body,score
0,[deleted],f5ywr,t3_f5ywr,2011-01-20 19:05:47,t5_2qxt5,/r/Liberal/comments/f5ywr/farewell_great_leader_rethinking_marx_liberty_the/,"Farewell, Great Leader!: Rethinking Marx, Liberty, the Individual and the State: Some comments",,http://www.gonzotimes.com/2011/01/farewell-great-leader-rethinking-marx-liberty-the-individual-and-the-state-some-comments/,,c1dnn6t,2011-01-21 18:01:28,Liberal,t3_f5ywr,t1_c1dnn6t,t5_2qxt5,t3_f5ywr,1426710000.0,[deleted],"This is a discussion of marxism or at least socialism, certainly not liberalism. There are some big differences there.",2
1,[deleted],f9eh9,t3_f9eh9,2011-01-26 15:46:55,t5_2qxt5,/r/Liberal/comments/f9eh9/feminism_for_men_cultural_sexism_we_tend_to_ignore/,Feminism For Men Cultural Sexism We Tend to Ignore,,http://www.gonzotimes.com/2011/01/feminism-for-men-and-boys/,,c1eideu,2011-01-28 05:46:05,Liberal,t3_f9eh9,t1_c1eideu,t5_2qxt5,t3_f9eh9,1426724000.0,[deleted],[deleted],1
2,shihtzuman,fbtax,t3_fbtax,2011-01-30 14:29:43,t5_2qxt5,/r/Liberal/comments/fbtax/ayn_rand_railed_against_government_benefits_but/,"Ayn Rand Railed Against Government Benefits, But Grabbed Social Security and Medicare When She Needed Them | | AlterNet",,"http://www.alternet.org/story/149721/ayn_rand_railed_against_government_benefits,_but_grabbed_social_security_and_medicare_when_she_needed_them",,c1eyxal,2011-02-01 02:43:04,Liberal,t3_fbtax,t1_c1eyxal,t5_2qxt5,t3_fbtax,1426733000.0,[deleted],"""It is obvious, in such cases, that a man receives his own money which was taken from him by force, directly and specifically, without his consent, against his own choice. Those who advocated such laws are morally guilty, since they assumed the “right” to force employers and unwilling co-workers. But **the victims, who opposed such laws, have a clear right to any refund of their own money—and they would not advance the cause of freedom if they left their money, unclaimed, for the benefit of the welfare-state administration.""**\r\n\r\nShe said this in 1966, and was diagnosed in 1974.\r\n\r\nhttp://www.noblesoul.com/orc/bio/biofaq.html http://aynrandlexicon.com/lexicon/government_grants_and_scholarships.html\r\n",1
3,shihtzuman,fdaaq,t3_fdaaq,2011-02-01 19:48:20,t5_2qxt5,/r/Liberal/comments/fdaaq/the_reagan_ruins/,The Reagan Ruins,,http://www.huffingtonpost.com/robert-l-borosage/the-reagan-ruins_b_816820.html,,c1f4b12,2011-02-02 02:41:46,Liberal,t3_fdaaq,t1_c1f4b12,t5_2qxt5,t3_fdaaq,1426735000.0,Rixar13,""" On all of these, the Gipper and conservati­ves got it wrong.""\r\nYes they got it wrong and won't stop until they destroy America... sigh \r\n",0
4,shihtzuman,fd2bi,t3_fd2bi,2011-02-01 13:41:02,t5_2qxt5,/r/Liberal/comments/fd2bi/alternet_more_craziness_from_arizona_rightwingers/,AlterNet: More Craziness From Arizona: Right-Wingers Aim to Sabotage Obama's Run for Re-Election,,http://www.alternet.org/module/printversion/149737,,c1f0pde,2011-02-01 13:44:08,Liberal,t3_fd2bi,t1_c1f0pde,t5_2qxt5,t3_fd2bi,1426734000.0,diggemigre,"More craziness from Martin Peretz:\n\n “Wouldn't it be better that, rather than have a Republican candidate trounce him in the general elections, a Democrat try to unseat him in the party primaries and at the convention. Surely, there are many sensible Democrats who realize that the ‘yes, we can’ dream is, in fact, Obama's own hallucination.”\n\n",1


In [8]:
final_df = final_df.drop(['retrieved_on_x', 'retrieved_on_y'], axis=1)

final_df.head()

Unnamed: 0,author_x,id_x,name_x,created_utc_x,subreddit_id_x,permalink,title,selftext,url,id_y,created_utc_y,subreddit,link_id,name_y,subreddit_id_y,parent_id,author_y,body,score
0,[deleted],f5ywr,t3_f5ywr,2011-01-20 19:05:47,t5_2qxt5,/r/Liberal/comments/f5ywr/farewell_great_leader_rethinking_marx_liberty_the/,"Farewell, Great Leader!: Rethinking Marx, Liberty, the Individual and the State: Some comments",,http://www.gonzotimes.com/2011/01/farewell-great-leader-rethinking-marx-liberty-the-individual-and-the-state-some-comments/,c1dnn6t,2011-01-21 18:01:28,Liberal,t3_f5ywr,t1_c1dnn6t,t5_2qxt5,t3_f5ywr,[deleted],"This is a discussion of marxism or at least socialism, certainly not liberalism. There are some big differences there.",2
1,[deleted],f9eh9,t3_f9eh9,2011-01-26 15:46:55,t5_2qxt5,/r/Liberal/comments/f9eh9/feminism_for_men_cultural_sexism_we_tend_to_ignore/,Feminism For Men Cultural Sexism We Tend to Ignore,,http://www.gonzotimes.com/2011/01/feminism-for-men-and-boys/,c1eideu,2011-01-28 05:46:05,Liberal,t3_f9eh9,t1_c1eideu,t5_2qxt5,t3_f9eh9,[deleted],[deleted],1
2,shihtzuman,fbtax,t3_fbtax,2011-01-30 14:29:43,t5_2qxt5,/r/Liberal/comments/fbtax/ayn_rand_railed_against_government_benefits_but/,"Ayn Rand Railed Against Government Benefits, But Grabbed Social Security and Medicare When She Needed Them | | AlterNet",,"http://www.alternet.org/story/149721/ayn_rand_railed_against_government_benefits,_but_grabbed_social_security_and_medicare_when_she_needed_them",c1eyxal,2011-02-01 02:43:04,Liberal,t3_fbtax,t1_c1eyxal,t5_2qxt5,t3_fbtax,[deleted],"""It is obvious, in such cases, that a man receives his own money which was taken from him by force, directly and specifically, without his consent, against his own choice. Those who advocated such laws are morally guilty, since they assumed the “right” to force employers and unwilling co-workers. But **the victims, who opposed such laws, have a clear right to any refund of their own money—and they would not advance the cause of freedom if they left their money, unclaimed, for the benefit of the welfare-state administration.""**\r\n\r\nShe said this in 1966, and was diagnosed in 1974.\r\n\r\nhttp://www.noblesoul.com/orc/bio/biofaq.html http://aynrandlexicon.com/lexicon/government_grants_and_scholarships.html\r\n",1
3,shihtzuman,fdaaq,t3_fdaaq,2011-02-01 19:48:20,t5_2qxt5,/r/Liberal/comments/fdaaq/the_reagan_ruins/,The Reagan Ruins,,http://www.huffingtonpost.com/robert-l-borosage/the-reagan-ruins_b_816820.html,c1f4b12,2011-02-02 02:41:46,Liberal,t3_fdaaq,t1_c1f4b12,t5_2qxt5,t3_fdaaq,Rixar13,""" On all of these, the Gipper and conservati­ves got it wrong.""\r\nYes they got it wrong and won't stop until they destroy America... sigh \r\n",0
4,shihtzuman,fd2bi,t3_fd2bi,2011-02-01 13:41:02,t5_2qxt5,/r/Liberal/comments/fd2bi/alternet_more_craziness_from_arizona_rightwingers/,AlterNet: More Craziness From Arizona: Right-Wingers Aim to Sabotage Obama's Run for Re-Election,,http://www.alternet.org/module/printversion/149737,c1f0pde,2011-02-01 13:44:08,Liberal,t3_fd2bi,t1_c1f0pde,t5_2qxt5,t3_fd2bi,diggemigre,"More craziness from Martin Peretz:\n\n “Wouldn't it be better that, rather than have a Republican candidate trounce him in the general elections, a Democrat try to unseat him in the party primaries and at the convention. Surely, there are many sensible Democrats who realize that the ‘yes, we can’ dream is, in fact, Obama's own hallucination.”\n\n",1


In [9]:
final_df = final_df.drop(['author_x', 'id_x', 'name_x', 'subreddit_id_x',
                          'permalink', 'url', 'id_y', 'name_y', 'subreddit_id_y',
                          'parent_id', 'author_y'], axis=1)


In [10]:
final_df.columns

Index(['created_utc_x', 'title', 'selftext', 'created_utc_y', 'subreddit',
       'link_id', 'body', 'score'],
      dtype='object')

In [11]:
import re
import pandas as pd

# Define the dictionary mapping variations to the standard form
name_variations = {
  r'\b(?:george\s+w\.\s+bush|bush|george(?:\s+w\.\s+bush)?)\b': 'George W. Bush',
  r'\b(?:barack\s+obama|obama|barack(?:\s+obama)?)\b': 'Barack Obama',
  r'\b(?:bill\s+clinton|clinton|bill(?:\s+clinton)?)\b': 'Bill Clinton',
  r'\b(?:donald\s+trump|trump|donald(?:\s+trump)?)\b': 'Donald Trump',
  r'\b(?:joe\s+biden|biden|joe(?:\s+biden)?)\b': 'Joe Biden',  
}


# Function to replace variations with standard names
def standardize_names(text):
    for pattern, standard_name in name_variations.items():
        text = re.sub(pattern, standard_name, text, flags=re.IGNORECASE)
    return text


In [12]:
# Import necessary libraries
import spacy
from spacy import displacy
import pandas as pd
import re
from textblob import TextBlob

# Download the small English model for spaCy
spacy.cli.download("en_core_web_sm")

# Load the spaCy model
nlp = spacy.load("en_core_web_sm")

# Function to extract persons (already standardized)
def extract_persons(text):
    # Combine all standardized names into one pattern
    pattern = r'\b(?:' + '|'.join([re.escape(name) for name in name_variations.values()]) + r')\b'
    # Find all matches of persons' names in the text
    persons_found = re.findall(pattern, text, flags=re.IGNORECASE)
    # Return unique persons' names
    return list(set(persons_found))



# Apply the extract_persons function to 'title', 'selftext', and 'body' columns
final_df['persons_title'] = final_df['title'].apply(extract_persons)
final_df['persons_selftext'] = final_df['selftext'].apply(extract_persons)
final_df['persons_body'] = final_df['body'].apply(extract_persons)

# Filter the DataFrame to include only rows where either 'persons_title' or 'persons_selftext' or 'persons_body' is not empty
filtered_df = final_df[(final_df['persons_title'].apply(len) > 0) | 
                       (final_df['persons_selftext'].apply(len) > 0) |
                       (final_df['persons_body'].apply(len) > 0)]

# # Compute sentiments for 'title', 'selftext', and 'body'
# filtered_df['sentiment_title'] = filtered_df['title'].apply(get_sentiment)
# filtered_df['sentiment_selftext'] = filtered_df['selftext'].apply(get_sentiment)
# filtered_df['sentiment_body'] = filtered_df['body'].apply(get_sentiment)

# Display the filtered DataFrame with sentiments
filtered_df.head()


[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


Unnamed: 0,created_utc_x,title,selftext,created_utc_y,subreddit,link_id,body,score,persons_title,persons_selftext,persons_body
221,2011-04-25 02:27:30,Donald Trump denies poor voting record: Tycoon reacts with fury to revelation by news station in his home town that he has not voted in primaries for 21 years\n,,2011-04-25 05:02:54,Liberal,t3_gwoua,"Trump is a catastrophe as a candidate. He's far to hot headed!\n\nHe goes for the jugular against any prominent person that says anything negative about him in an extremely unflattering way. Be it man or woman, it doesn't matter, he attacks ferociously, and in an ugly way. The way he attacked Rosie was disgusting.\n\nAnd that Tycoon tag, what are the origins of that? I mean the man just had one of his casinos file for bankruptcy....What the hell kind of extreme mismanagement causes a casino to go bankrupt? \n\nStories abound about how he misrepresents his wealth.\n\nThe man is shady period, and that's not going to go over well with the majority of Americans.\n\nI just can not see that man being a legitimate candidate. \n\nThe speculation seems to be that a presidential run was floated simply to increase the ratings of his latest TV venture with other people's money.\n\nTo the point though; sure I believe the story. I'm sure he has a poor voting record.",1,[Donald Trump],[],[]
222,2011-04-25 02:27:30,Donald Trump denies poor voting record: Tycoon reacts with fury to revelation by news station in his home town that he has not voted in primaries for 21 years\n,,2011-04-25 07:57:38,Liberal,t3_gwoua,"&gt; Trump's lawyer Michael Cohen told Associated Press on Saturday that ""for one of the greatest international businessmen who travels all over the country and the world, his voting record is very, very good.""\n\nWith vote by mail available, this is really not an excuse.",1,[Donald Trump],[],[]
238,2011-04-27 01:10:59,The Legacy of Malcolm X: Why his vision lives on in Barack Obama,,2011-04-27 02:07:43,Liberal,t3_gy8f3,Brother Malcolm would have slapped the teeth right out of the corporate tool's face.,0,[Barack Obama],[],[]
288,2011-05-07 12:52:06,"Barack Obama to release up to 2,000 photographs of prisoner abuse",,2011-05-07 13:07:20,Liberal,t3_h61un,"So let me get this straight, pictures of dead Osama are inflammatory and inappropriate, but pictures of prisoner abuse is fine? If it makes us look bad, publish it. If it causes us to swell with love of country, suppress it.",1,[Barack Obama],[],[]
289,2011-05-07 12:52:06,"Barack Obama to release up to 2,000 photographs of prisoner abuse",,2011-05-07 15:21:20,Liberal,t3_h61un,"One does not equal the other.\r\n\r\nAnd, did you bother even *skimming* the article?",1,[Barack Obama],[],[]


In [13]:
filtered_df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 13930 entries, 221 to 224784
Data columns (total 11 columns):
 #   Column            Non-Null Count  Dtype         
---  ------            --------------  -----         
 0   created_utc_x     13930 non-null  datetime64[ns]
 1   title             13930 non-null  object        
 2   selftext          13930 non-null  object        
 3   created_utc_y     13930 non-null  datetime64[ns]
 4   subreddit         13930 non-null  object        
 5   link_id           13930 non-null  object        
 6   body              13930 non-null  object        
 7   score             13930 non-null  int64         
 8   persons_title     13930 non-null  object        
 9   persons_selftext  13930 non-null  object        
 10  persons_body      13930 non-null  object        
dtypes: datetime64[ns](2), int64(1), object(8)
memory usage: 1.3+ MB


In [14]:
filtered_df.to_csv('C:\\Users\\HP\Desktop\\Middlesex Course Content\\Giovanni Proposed Projects\\US Political Subreddits Selection (19-07-2024)\\Selected Subreddits\\Whole Subreddits\\Liberal_ner.csv')