## Gathering OTLY Twitter Mention Data
In this section we will be using SNScrape to gather twitter data surrounding OTLY's IPO date.

In [26]:
# Import libraries.
import os
import pandas as pd
import matplotlib

In [2]:
# Setting variables to be used in format string command below
tweet_count = 100000
text_query = "OTLY"
since_date = "2021-05-01"
until_date = "2021-06-30"

# Using OS library to call CLI commands in Python
os.system('snscrape --jsonl --max-results {} --since {} twitter-search "{} until:{}"> text-query-tweets.json'.format(tweet_count, since_date, text_query, until_date))

0

In [5]:
# Reads the json generated from the CLI command above and creates a pandas dataframe
otly_tweets_df = pd.read_json('text-query-tweets.json', lines=True)

# Displays first 5 entries from dataframe
display(otly_tweets_df.describe())
otly_tweets_df.head()


Unnamed: 0,id,replyCount,retweetCount,likeCount,quoteCount,conversationId,retweetedTweet,inReplyToTweetId
count,3330.0,3330.0,3330.0,3330.0,3330.0,3330.0,0.0,849.0
mean,1.400502e+18,0.848048,0.426126,5.957958,0.115315,1.400353e+18,,1.401435e+18
std,4763442000000000.0,3.575017,2.651611,21.491377,0.723738,5334267000000000.0,,5192383000000000.0
min,1.388835e+18,0.0,0.0,0.0,0.0,1.321816e+18,,1.358102e+18
25%,1.395748e+18,0.0,0.0,0.0,0.0,1.395735e+18,,1.396961e+18
50%,1.399872e+18,0.0,0.0,1.0,0.0,1.399849e+18,,1.402294e+18
75%,1.404412e+18,1.0,0.0,3.0,0.0,1.404405e+18,,1.405139e+18
max,1.409969e+18,128.0,119.0,516.0,25.0,1.409969e+18,,1.40995e+18


Unnamed: 0,_type,url,date,content,renderedContent,id,user,replyCount,retweetCount,likeCount,...,media,retweetedTweet,quotedTweet,inReplyToTweetId,inReplyToUser,mentionedUsers,coordinates,place,hashtags,cashtags
0,snscrape.modules.twitter.Tweet,https://twitter.com/PlantFinance/status/140996...,2021-06-29 20:17:10+00:00,Plant Based Companies and Public Markets Today...,Plant Based Companies and Public Markets Today...,1409969210193707008,"{'_type': 'snscrape.modules.twitter.User', 'us...",0,0,10,...,,,,,,,,,,"[HULK, MILK, OG, MEAT, OTLY, EATS, BABY]"
1,snscrape.modules.twitter.Tweet,https://twitter.com/signal_options/status/1409...,2021-06-29 19:53:56+00:00,$OTLY as predicted! Entered right at bottom! h...,$OTLY as predicted! Entered right at bottom! h...,1409963362444795906,"{'_type': 'snscrape.modules.twitter.User', 'us...",0,0,0,...,"[{'_type': 'snscrape.modules.twitter.Photo', '...",,,,,,,,,[OTLY]
2,snscrape.modules.twitter.Tweet,https://twitter.com/PeterPh19603125/status/140...,2021-06-29 19:51:30+00:00,@honkystonks I’m down 9%. Could be worse. Made...,@honkystonks I’m down 9%. Could be worse. Made...,1409962750898442240,"{'_type': 'snscrape.modules.twitter.User', 'us...",0,0,3,...,,,,1.409947e+18,"{'_type': 'snscrape.modules.twitter.User', 'us...","[{'_type': 'snscrape.modules.twitter.User', 'u...",,,,"[bbig, otly, ibio]"
3,snscrape.modules.twitter.Tweet,https://twitter.com/Arrow_Trades/status/140996...,2021-06-29 19:42:43+00:00,@quantmanz holding $otly over night?,@quantmanz holding $otly over night?,1409960539992715264,"{'_type': 'snscrape.modules.twitter.User', 'us...",0,0,0,...,,,,,,"[{'_type': 'snscrape.modules.twitter.User', 'u...",,,,[otly]
4,snscrape.modules.twitter.Tweet,https://twitter.com/BreauxGgg/status/140995273...,2021-06-29 19:11:41+00:00,@RampCapitalLLC What you think abt Otly?,@RampCapitalLLC What you think abt Otly?,1409952730584653824,"{'_type': 'snscrape.modules.twitter.User', 'us...",0,0,0,...,,,,1.40995e+18,"{'_type': 'snscrape.modules.twitter.User', 'us...","[{'_type': 'snscrape.modules.twitter.User', 'u...",,,,


In [7]:
# Export dataframe into a CSV
otly_tweets_df.to_csv('otly_tweets.csv', sep=',', index=False)

In [15]:
# Normalize the date for easier reading.
otly_tweets_df["date"] = pd.DatetimeIndex(otly_tweets_df.date).normalize()

# Preview the dataframe.
otly_tweets_df.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  otly_tweets_df["date"] = pd.DatetimeIndex(otly_tweets_df.date).normalize()


Unnamed: 0,date,id,replyCount,retweetCount,likeCount,quoteCount
0,2021-06-29 00:00:00+00:00,1409969210193707008,0,0,10,0
1,2021-06-29 00:00:00+00:00,1409963362444795906,0,0,0,0
2,2021-06-29 00:00:00+00:00,1409962750898442240,0,0,3,0
3,2021-06-29 00:00:00+00:00,1409960539992715264,0,0,0,0
4,2021-06-29 00:00:00+00:00,1409952730584653824,0,0,0,0


In [19]:
# Grouping the twitter mentions by date to get a count of mentions per day and the sum of the tweet details. 
otly_tweets_per_day_df = otly_tweets_df[["date","id"]].groupby("date").count()
otly_tweets_details = otly_tweets_df[["date","replyCount","retweetCount","likeCount","quoteCount"]].groupby("date").sum()

In [24]:
# Preview tweets per day DF.
otly_tweets_per_day_df.tail(10)


Unnamed: 0_level_0,id
date,Unnamed: 1_level_1
2021-06-20 00:00:00+00:00,23
2021-06-21 00:00:00+00:00,37
2021-06-22 00:00:00+00:00,30
2021-06-23 00:00:00+00:00,50
2021-06-24 00:00:00+00:00,82
2021-06-25 00:00:00+00:00,57
2021-06-26 00:00:00+00:00,23
2021-06-27 00:00:00+00:00,43
2021-06-28 00:00:00+00:00,42
2021-06-29 00:00:00+00:00,40


In [25]:
# Preview tweets details DF.
otly_tweets_details.tail(10)

Unnamed: 0_level_0,replyCount,retweetCount,likeCount,quoteCount
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2021-06-20 00:00:00+00:00,52,6,604,3
2021-06-21 00:00:00+00:00,25,25,287,5
2021-06-22 00:00:00+00:00,13,7,142,6
2021-06-23 00:00:00+00:00,45,6,377,1
2021-06-24 00:00:00+00:00,95,24,689,5
2021-06-25 00:00:00+00:00,44,17,322,5
2021-06-26 00:00:00+00:00,26,22,172,1
2021-06-27 00:00:00+00:00,29,19,273,7
2021-06-28 00:00:00+00:00,17,7,190,4
2021-06-29 00:00:00+00:00,27,18,191,1
