In [1]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

In [2]:
df = pd.read_csv('stock_tweets.csv')
df.head()

Unnamed: 0,Date,Tweet,Stock Name,Company Name
0,2022-09-29 23:41:16+00:00,Mainstream media has done an amazing job at br...,TSLA,"Tesla, Inc."
1,2022-09-29 23:24:43+00:00,Tesla delivery estimates are at around 364k fr...,TSLA,"Tesla, Inc."
2,2022-09-29 23:18:08+00:00,3/ Even if I include 63.0M unvested RSUs as of...,TSLA,"Tesla, Inc."
3,2022-09-29 22:40:07+00:00,@RealDanODowd @WholeMarsBlog @Tesla Hahaha why...,TSLA,"Tesla, Inc."
4,2022-09-29 22:27:05+00:00,"@RealDanODowd @Tesla Stop trying to kill kids,...",TSLA,"Tesla, Inc."


In [3]:
print(df.shape)
print(df.dtypes)

(80793, 4)
Date            object
Tweet           object
Stock Name      object
Company Name    object
dtype: object


In [4]:
# Convert date column to datetime object
df['Date'] = pd.to_datetime(df['Date'])

# Extract date and time components into separate columns
df['time'] = df['Date'].dt.time
df['Date'] = df['Date'].dt.date

df

Unnamed: 0,Date,Tweet,Stock Name,Company Name,time
0,2022-09-29,Mainstream media has done an amazing job at br...,TSLA,"Tesla, Inc.",23:41:16
1,2022-09-29,Tesla delivery estimates are at around 364k fr...,TSLA,"Tesla, Inc.",23:24:43
2,2022-09-29,3/ Even if I include 63.0M unvested RSUs as of...,TSLA,"Tesla, Inc.",23:18:08
3,2022-09-29,@RealDanODowd @WholeMarsBlog @Tesla Hahaha why...,TSLA,"Tesla, Inc.",22:40:07
4,2022-09-29,"@RealDanODowd @Tesla Stop trying to kill kids,...",TSLA,"Tesla, Inc.",22:27:05
...,...,...,...,...,...
80788,2021-10-07,Some of the fastest growing tech stocks on the...,XPEV,XPeng Inc.,17:11:57
80789,2021-10-04,"With earnings on the horizon, here is a quick ...",XPEV,XPeng Inc.,17:05:59
80790,2021-10-01,Our record delivery results are a testimony of...,XPEV,XPeng Inc.,04:43:41
80791,2021-10-01,"We delivered 10,412 Smart EVs in Sep 2021, rea...",XPEV,XPeng Inc.,00:03:32


In [5]:
from textblob import TextBlob
import re
import string
import nltk
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
nltk.download('stopwords')
nltk.download('punkt')
nltk.download('wordnet')

# Define function for preprocessing tweets
def preprocess_tweet(tweet):
    # Remove URLs, user mentions, and hashtags
    tweet = re.sub(r"http\S+", "", tweet)
    tweet = re.sub(r"@[^\s]+", "", tweet)
    tweet = re.sub(r"#([^\s]+)", "", tweet)
    # Remove punctuation
    tweet = tweet.translate(str.maketrans("", "", string.punctuation))
    # Convert to lowercase
    tweet = tweet.lower()
    # Tokenize the tweet
    tokens = nltk.word_tokenize(tweet)
    # Remove stop words
    stop_words = set(stopwords.words('english'))
    filtered_tokens = [token for token in tokens if token not in stop_words]
    # Lemmatize the tokens
    lemmatizer = WordNetLemmatizer()
    lemmatized_tokens = [lemmatizer.lemmatize(token) for token in filtered_tokens]
    # Join the tokens back into a string
    preprocessed_tweet = " ".join(lemmatized_tokens)
    return preprocessed_tweet

# Preprocess the tweets
df['preprocessed_tweet'] = df['Tweet'].apply(preprocess_tweet)

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/ramakanthnamuduri/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/ramakanthnamuduri/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/ramakanthnamuduri/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [6]:
# Define function for sentiment analysis using TextBlob
def get_sentiment(tweet):
    blob = TextBlob(tweet)
    sentiment = blob.sentiment.polarity
    if sentiment > 0:
        return 'Positive'
    elif sentiment < 0:
        return 'Negative'
    else:
        return 'Neutral'

# Perform sentiment analysis
df['sentiment'] = df['preprocessed_tweet'].apply(get_sentiment)

# Print the resulting dataframe
df.head()

Unnamed: 0,Date,Tweet,Stock Name,Company Name,time,preprocessed_tweet,sentiment
0,2022-09-29,Mainstream media has done an amazing job at br...,TSLA,"Tesla, Inc.",23:41:16,mainstream medium done amazing job brainwashin...,Positive
1,2022-09-29,Tesla delivery estimates are at around 364k fr...,TSLA,"Tesla, Inc.",23:24:43,tesla delivery estimate around 364k analyst tsla,Neutral
2,2022-09-29,3/ Even if I include 63.0M unvested RSUs as of...,TSLA,"Tesla, Inc.",23:18:08,3 even include 630m unvested rsus 630 addition...,Positive
3,2022-09-29,@RealDanODowd @WholeMarsBlog @Tesla Hahaha why...,TSLA,"Tesla, Inc.",22:40:07,hahaha still trying stop tesla fsd bro get shi...,Positive
4,2022-09-29,"@RealDanODowd @Tesla Stop trying to kill kids,...",TSLA,"Tesla, Inc.",22:27:05,stop trying kill kid sad deranged old man,Negative


In [7]:
df.to_csv("sentiment-textblob.csv")