# Analyzing Reddit Posts Around Ann Arbor

Imports

In [1]:
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import numpy as np
import matplotlib.pyplot as plt

Loading Data

In [2]:
df = pd.read_csv("reddit_combined.csv")
print(f"Successfully loaded {len(df)} rows from reddit_combined.csv")
df.head()

Successfully loaded 630 rows from reddit_combined.csv


Unnamed: 0,type,id,parent_id,author,subreddit,title,body,score,num_comments,created_utc,url,source_file
0,post,1h6brv5,,LaxJackson,AnnArbor,“Some want pedestrian malls in downtown Ann Ar...,,65,55.0,1733303000.0,https://www.mlive.com/news/ann-arbor/2024/11/s...,post4.json
1,comment,m0jlqmn,t1_m0evkw5,kittywheezes,AnnArbor,,I think Church Street works because its centra...,1,,1733410000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json
2,comment,m0evkw5,t1_m0et848,Mezmorki,AnnArbor,,Yes. Kalamzoo Mall north of Michigan Ave is s...,7,,1733339000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json
3,comment,m0fqbb8,t1_m0et848,HeimrArnadalr,AnnArbor,,&gt; an architect friend saying pedestrian mal...,6,,1733348000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json
4,comment,m0mmd1d,t1_m0mhh7h,prosocialbehavior,AnnArbor,,I am sure it is a factor but just saying weath...,5,,1733444000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json


Preparing Data

In [3]:
# Fill NaN values in 'title' and 'body' with empty strings
df['title'] = df['title'].fillna('')
df['body'] = df['body'].fillna('')

# Create a single text column for analysis
df['text_to_analyze'] = df['title'] + ' ' + df['body']

# Strip leading/trailing whitespace
df['text_to_analyze'] = df['text_to_analyze'].str.strip()

# Replace completely empty strings with np.nan so they can be dropped
df['text_to_analyze'].replace('', np.nan, inplace=True)

# Drop rows where there is no text to analyze
original_count = len(df)
df.dropna(subset=['text_to_analyze'], inplace=True)

if original_count > len(df):
    print(f"Dropped {original_count - len(df)} rows that had no text content.")

print(f"We have {len(df)} rows with text.")
df.head()

We have 630 rows with text.


Unnamed: 0,type,id,parent_id,author,subreddit,title,body,score,num_comments,created_utc,url,source_file,text_to_analyze
0,post,1h6brv5,,LaxJackson,AnnArbor,“Some want pedestrian malls in downtown Ann Ar...,,65,55.0,1733303000.0,https://www.mlive.com/news/ann-arbor/2024/11/s...,post4.json,“Some want pedestrian malls in downtown Ann Ar...
1,comment,m0jlqmn,t1_m0evkw5,kittywheezes,AnnArbor,,I think Church Street works because its centra...,1,,1733410000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json,I think Church Street works because its centra...
2,comment,m0evkw5,t1_m0et848,Mezmorki,AnnArbor,,Yes. Kalamzoo Mall north of Michigan Ave is s...,7,,1733339000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json,Yes. Kalamzoo Mall north of Michigan Ave is s...
3,comment,m0fqbb8,t1_m0et848,HeimrArnadalr,AnnArbor,,&gt; an architect friend saying pedestrian mal...,6,,1733348000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json,&gt; an architect friend saying pedestrian mal...
4,comment,m0mmd1d,t1_m0mhh7h,prosocialbehavior,AnnArbor,,I am sure it is a factor but just saying weath...,5,,1733444000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json,I am sure it is a factor but just saying weath...


Sentiment Analysis

In [4]:
analyzer = SentimentIntensityAnalyzer()

# Use the 'compound' score, which is a single normalized score from -1 to 1
df['sentiment_score'] = df['text_to_analyze'].apply(lambda text: analyzer.polarity_scores(text)['compound'])

print("Sentiment analysis complete.")
df.head()

Sentiment analysis complete.


Unnamed: 0,type,id,parent_id,author,subreddit,title,body,score,num_comments,created_utc,url,source_file,text_to_analyze,sentiment_score
0,post,1h6brv5,,LaxJackson,AnnArbor,“Some want pedestrian malls in downtown Ann Ar...,,65,55.0,1733303000.0,https://www.mlive.com/news/ann-arbor/2024/11/s...,post4.json,“Some want pedestrian malls in downtown Ann Ar...,0.5267
1,comment,m0jlqmn,t1_m0evkw5,kittywheezes,AnnArbor,,I think Church Street works because its centra...,1,,1733410000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json,I think Church Street works because its centra...,0.6621
2,comment,m0evkw5,t1_m0et848,Mezmorki,AnnArbor,,Yes. Kalamzoo Mall north of Michigan Ave is s...,7,,1733339000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json,Yes. Kalamzoo Mall north of Michigan Ave is s...,0.964
3,comment,m0fqbb8,t1_m0et848,HeimrArnadalr,AnnArbor,,&gt; an architect friend saying pedestrian mal...,6,,1733348000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json,&gt; an architect friend saying pedestrian mal...,0.6908
4,comment,m0mmd1d,t1_m0mhh7h,prosocialbehavior,AnnArbor,,I am sure it is a factor but just saying weath...,5,,1733444000.0,https://reddit.com/r/AnnArbor/comments/1h6brv5...,post4.json,I am sure it is a factor but just saying weath...,-0.765


Our results!

In [9]:
df.to_csv('reddit_with_sentiment.csv', index=False)
print(f"Successfully saved results to reddit_with_sentiment.csv")

# Show a sample of the results
print("\n--- Sample of Results ---")
df = df.sort_values(by = 'sentiment_score', ascending = False)
print("Most positive:")
print(df[['type', 'text_to_analyze', 'sentiment_score']].head(5))
print("Most negative:")
print(df[['type', 'text_to_analyze', 'sentiment_score']].tail(5))

print("\n--- Sentiment Score Statistics ---")
print(df['sentiment_score'].describe())

Successfully saved results to reddit_with_sentiment.csv

--- Sample of Results ---
Most positive:
        type                                    text_to_analyze  \
9    comment  This is a complex topic, and there are a few t...   
407  comment  okay man, but all i’m saying is, as someone wh...   
234  comment  Oh, I thought you meant this weekend.\n\nI gue...   
345     post  Where to Live IN A2: Walkable, Community, Mill...   
166  comment  Id say it’s pretty par for a college town; you...   

     sentiment_score  
9             0.9977  
407           0.9939  
234           0.9927  
345           0.9924  
166           0.9882  
Most negative:
        type                                    text_to_analyze  \
176  comment  That Kroger has horrible stock, bad meat, no e...   
486  comment  Agree - people are infuriated that there are "...   
572  comment  My spouse started using a wheelchair 2 years a...   
257  comment  Totally walkable. From the stadium it’s only a...   
459  commen