In [16]:
import pandas as pd
import nltk
from nltk.sentiment import SentimentIntensityAnalyzer
from nltk.tokenize import word_tokenize
import re
import os

# Check my current working directory and change the csv file path
print(os.getcwd())
downloads_path = r'C:\Users\brtm2'
os.chdir(downloads_path)
print("Current working directory:", os.getcwd())

C:\Users\brtm2
Current working directory: C:\Users\brtm2


In [17]:
# Read in team csv files
rangers_comments_df = pd.read_csv('rangers_comments_sentiment_analysis')
cowboys_comments_df = pd.read_csv(r'C:\Users\brtm2\cowboys_comments_sentiment_analysis')
mavericks_comments_df = pd.read_csv(r'C:\Users\brtm2\mavericks_comments_sentiment_analysis')

cowboys_comments_df.head()

Unnamed: 0,comment_body
0,Which is kind of a weird thing to say when Jer...
1,https://preview.redd.it/ocknt98prghc1.jpeg?wid...
2,Phenomenal hire. This is the type of person th...
3,I miss him.
4,"Great defensive mind, he took over in Minnesot..."


In [22]:
# Function to preprocess comments
def preprocess_text(text):
    # Lowercase text
    text = text.lower()
    # Use regex to remove URLs and special characters/numbers
    text = re.sub(r'http\S+', '', text)  # Remove URLs
    text = re.sub(r'[^a-zA-Z\s]', '', text)  # Remove special characters and numbers
    # Tokenize text
    tokens = word_tokenize(text)
    # Join tokens back to a single string
    processed_text = ' '.join(tokens)
    return processed_text

# Apply the preprocessing function to each comment in your DataFrames
rangers_comments_df['processed_comment'] = rangers_comments_df['comment_body'].apply(preprocess_text)
cowboys_comments_df['processed_comment'] = cowboys_comments_df['comment_body'].apply(preprocess_text)
mavericks_comments_df['processed_comment'] = mavericks_comments_df['comment_body'].apply(preprocess_text)

cowboys_comments_df.head()

Unnamed: 0,comment_body,processed_comment
0,Which is kind of a weird thing to say when Jer...,which is kind of a weird thing to say when jer...
1,https://preview.redd.it/ocknt98prghc1.jpeg?wid...,
2,Phenomenal hire. This is the type of person th...,phenomenal hire this is the type of person thi...
3,I miss him.,i miss him
4,"Great defensive mind, he took over in Minnesot...",great defensive mind he took over in minnesota...


In [24]:
cowboys_comments_df.shape

(29818, 2)

In [42]:
# Filter out rows where 'processed_comment' is empty for each DataFrame
rangers_comments_df = rangers_comments_df[rangers_comments_df['processed_comment'] != '']
cowboys_comments_df = cowboys_comments_df[cowboys_comments_df['processed_comment'] != '']
mavericks_comments_df = mavericks_comments_df[mavericks_comments_df['processed_comment'] != '']

In [45]:
print(cowboys_comments_df.shape)
cowboys_comments_df.head()

(29551, 2)


Unnamed: 0,comment_body,processed_comment
0,Which is kind of a weird thing to say when Jer...,which is kind of a weird thing to say when jer...
2,Phenomenal hire. This is the type of person th...,phenomenal hire this is the type of person thi...
3,I miss him.,i miss him
4,"Great defensive mind, he took over in Minnesot...",great defensive mind he took over in minnesota...
5,"Good with this, especially since it seems unli...",good with this especially since it seems unlik...


In [None]:
# Initialize VADER SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()

# Function to calculate sentiment scores
def calc_sa_scores(comment):
    return sia.polarity_scores(comment)['compound']

# Function to categorize sentiment based on the compound score
def categorize(score):
    if score > 0.05:
        return 'Positive'
    elif score < -0.05:
        return 'Negative'
    else:
        return 'Neutral'

# Function to apply sentiment analysis on a DataFrame
def apply_sa(car_df):
    # Calculate sentiment scores
    car_df['sentiment_score'] = car_df['processed_comment'].apply(calc_sa_scores)
    # Categorize sentiment
    car_df['sentiment_category'] = car_df['sentiment_score'].apply(categorize)
    return car_df

# Apply sentiment analysis to each DataFrame
rangers_comments_df = apply_sa(rangers_comments_df)
cowboys_comments_df = apply_sa(cowboys_comments_df)
mavericks_comments_df = apply_sa(mavericks_comments_df)

In [47]:
# Display the head of each DataFrame to verify the results
cowboys_comments_df.head()

Unnamed: 0,comment_body,processed_comment,sentiment_score,sentiment_category
0,Which is kind of a weird thing to say when Jer...,which is kind of a weird thing to say when jer...,-0.2484,Negative
2,Phenomenal hire. This is the type of person th...,phenomenal hire this is the type of person thi...,-0.5994,Negative
3,I miss him.,i miss him,-0.1531,Negative
4,"Great defensive mind, he took over in Minnesot...",great defensive mind he took over in minnesota...,0.7906,Positive
5,"Good with this, especially since it seems unli...",good with this especially since it seems unlik...,0.6997,Positive


In [48]:
# Save the updated DataFrame to CSV file
rangers_comments_df.to_csv('rangers_scores.csv', index=False)
cowboys_comments_df.to_csv('cowboys_scores.csv', index=False)
mavericks_comments_df.to_csv('mavericks_scores.csv', index=False)