In [1]:
import pandas as pd
from langdetect import detect
from pandarallel import pandarallel

In [2]:
tesla_df = pd.read_csv("tesla-tweets.csv")

In [3]:
tesla_df.head()

Unnamed: 0,Date & Time,Profile Picture Link,Twitter ID,Tweet Text,Tweet Link
0,"April 10, 2022 at 07:44PM",http://pbs.twimg.com/profile_images/15120745...,@Jessica1988kk,"RT @invest_answers: Crypto news, #Bitcoin Whal...",https://twitter.com/Jessica1988kk/status/15131...
1,"April 10, 2022 at 07:45PM",http://pbs.twimg.com/profile_ s/87878355348773...,@JotaGe2014,#Tesla tiene récord de autos vendidos. Es impr...,https://twitter.com/JotaGe2014/status/15131737...
2,"April 10, 2022 at 07:45PM",http://pbs.twimg.com/profile_images/936422368...,@MmeCallas,RT @CottonCodes: 🐒 #love in my #MariaCallas I ...,https://twitter.com/MmeCallas/status/151317374...
3,"April 10, 2022 at 07:45PM",http://pbs.twimg.com/profile_images/146366591...,@BotSecx,RT @CottonCodes: 🐒 #love in my #MariaCallas I ...,https://twitter.com/BotSecx/status/15131737626...
4,"April 10, 2022 at 07:45PM",http://pbs.twimg.com/profile_images/111675859...,@agseh,RT @RupiReportero_: 🙆‍♂️🚘 Al que le robaron la...,https://twitter.com/agseh/status/1513173864829...


In [4]:
tesla_df.tail()

Unnamed: 0,Date & Time,Profile Picture Link,Twitter ID,Tweet Text,Tweet Link
151995,"November 12, 2022 at 02:20PM",http://pbs.twimg.com/profile_images/153320879...,@FolgendenFolgen,"RT @unheilbargut: Erinnert ihr euch, als @elon...",https://twitter.com/FolgendenFolgen/status/159...
151996,"November 12, 2022 at 03:10PM",http://abs.twimg.com/sticky/default_profile_i...,@kerntdennis,"RT @unheilbargut: Erinnert ihr euch, als @elon...",https://twitter.com/kerntdennis/status/1591395...
151997,"November 12, 2022 at 03:10PM",http://pbs.twimg.com/profile_images/133838713...,@kadiff651,"RT @unheilbargut: Erinnert ihr euch, als @elon...",https://twitter.com/kadiff651/status/159139556...
151998,"November 12, 2022 at 03:10PM",http://pbs.twimg.com/profile_images/121896452...,@TeslaradarB,Nice! skatebambi 🇸🇪 just spotted a 2021 Tesla ...,https://twitter.com/TeslaradarB/status/1591395...
151999,"November 12, 2022 at 03:11PM",http://pbs.twimg.com/profile_images/157116418...,@JandTContent,Crash and burn EVERYWHERE... \n\nAnother one b...,https://twitter.com/JandTContent/status/159139...


In [5]:
def detect_en(text):
    try:
        return detect(text) == 'en'
    except:
        return False

In [6]:
#remove all rows that are not english 
pandarallel.initialize(progress_bar=False)
tesla_df = tesla_df[tesla_df['Tweet Text'].parallel_apply(detect_en)]

INFO: Pandarallel will run on 4 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [15]:
#drop unnecessary colunns
tesla_df = tesla_df.drop(columns=['Profile Picture Link', 'Tweet Link'])

In [10]:
#the amount of unique IDs... aka probably many bots/repeat users
tesla_df["Twitter ID"].nunique()

56720

In [11]:
from textblob import TextBlob

In [12]:
#function to get sentiment rating
def get_sentiment(text):
    return TextBlob(text).sentiment.polarity

In [13]:
#new column of sentiment rating
pandarallel.initialize(progress_bar=False)
tesla_df['Sentiment Rating'] = tesla_df['Tweet Text'].parallel_apply(get_sentiment)

INFO: Pandarallel will run on 4 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.


In [16]:
tesla_df

Unnamed: 0,Date & Time,Twitter ID,Tweet Text,Sentiment Rating
0,"April 10, 2022 at 07:44PM",@Jessica1988kk,"RT @invest_answers: Crypto news, #Bitcoin Whal...",0.318182
2,"April 10, 2022 at 07:45PM",@MmeCallas,RT @CottonCodes: 🐒 #love in my #MariaCallas I ...,0.425000
3,"April 10, 2022 at 07:45PM",@BotSecx,RT @CottonCodes: 🐒 #love in my #MariaCallas I ...,0.425000
5,"April 10, 2022 at 07:45PM",@ElTendies,RT @cb_doge: Tesla - A Trillion Dollar Company...,0.083333
6,"April 10, 2022 at 07:45PM",@LauraCory2013,"@elonmusk, few #chargingstations in my area. I...",0.000000
...,...,...,...,...
151986,"November 12, 2022 at 02:18PM",@NamaloomInsan,@stratosathens @alfonslopeztena @elonmusk @tes...,0.333333
151991,"November 12, 2022 at 02:19PM",@DemApples00,#DOGE #DogelonMars ……🚀🌗\n\nThe PEOPLES AND OFF...,0.000000
151992,"November 12, 2022 at 02:19PM",@Mrtnl79,"RT @HakanHoca22: Elon, my friend, come to Edir...",0.550000
151993,"November 12, 2022 at 02:20PM",@DemApples00,#DOGE #DogelonMars …🚀🌗\n\nThe PEOPLES AND OFFI...,0.000000


In [19]:
#sanity check
min_value_index = tesla_df['Sentiment Rating'].idxmin()
min_value_tweet = tesla_df.loc[min_value_index, 'Tweet Text']
min_value_tweet

'RT @nisargmehta2112: #EVs running on  #BloodBatteries and #ChildLabor being the part of it - A shocking revelation by @WIONews by @palkisu \n\n@elonmusk , please share your strategy to rectify this. \n\n#Tesla #Volvo #MercedesBenz #Volkswagen #China #DRCongo https://t.co/ccLKPIFVsE'

In [21]:
#sanity check
max_value_index = tesla_df['Sentiment Rating'].idxmax()
max_value_tweet = tesla_df.loc[max_value_index, 'Tweet Text']
max_value_tweet

"RT @cryptoziizii: What time is it?\nTime to load up santafloki!!\nWisdom is when you have a bag of santafloki and still bag up more!!\nLET'S GOOOO!!\n#btc #doge #crypto #santafloki #bsc #santaflokiarmy #bsc #binance #tesla #elon\n#savethechildren #coingecko #Cmc https://t.co/DFGSwMuo6Q"

In [26]:
# this would remove bots tweeting the same stuff, but there are some people who tweet very similar things
# example: DemApples00 tweeted the very similar stuff one min apart but had a different spam link at the end 
tesla_df.drop_duplicates(subset=['Twitter ID', 'Tweet Text'], inplace=True)

In [27]:
tesla_df

Unnamed: 0,Date & Time,Twitter ID,Tweet Text,Sentiment Rating
0,"April 10, 2022 at 07:44PM",@Jessica1988kk,"RT @invest_answers: Crypto news, #Bitcoin Whal...",0.318182
2,"April 10, 2022 at 07:45PM",@MmeCallas,RT @CottonCodes: 🐒 #love in my #MariaCallas I ...,0.425000
3,"April 10, 2022 at 07:45PM",@BotSecx,RT @CottonCodes: 🐒 #love in my #MariaCallas I ...,0.425000
5,"April 10, 2022 at 07:45PM",@ElTendies,RT @cb_doge: Tesla - A Trillion Dollar Company...,0.083333
6,"April 10, 2022 at 07:45PM",@LauraCory2013,"@elonmusk, few #chargingstations in my area. I...",0.000000
...,...,...,...,...
151986,"November 12, 2022 at 02:18PM",@NamaloomInsan,@stratosathens @alfonslopeztena @elonmusk @tes...,0.333333
151991,"November 12, 2022 at 02:19PM",@DemApples00,#DOGE #DogelonMars ……🚀🌗\n\nThe PEOPLES AND OFF...,0.000000
151992,"November 12, 2022 at 02:19PM",@Mrtnl79,"RT @HakanHoca22: Elon, my friend, come to Edir...",0.550000
151993,"November 12, 2022 at 02:20PM",@DemApples00,#DOGE #DogelonMars …🚀🌗\n\nThe PEOPLES AND OFFI...,0.000000
