In [1]:
# TODO: add spikes analysis to code

In [2]:
import pandas as pd
import re
import string

# progress bar
from tqdm import tqdm
tqdm.pandas()

TOPIC_MODELLING = 0
SENTIMENT_ANALYSIS = 1

# file paths
TWEET_CORPUS_DATA_IN = "../datain/clean/largest_community_tweets.jsonl"
MAY_1_DATA_OUT = "../dataout/general/may_1.csv"
MAY_5_DATA_OUT = "../dataout/general/may_5.csv"
MAY_22_DATA_OUT = "../dataout/general/may_22.csv"
MAY_25_DATA_OUT = "../dataout/general/may_25.csv"
MAY_31_DATA_OUT = "../dataout/general/may_31.csv"


In [3]:
def load_data():
    '''
        Import corpus data in json format.
        Filter to have only english tweets and remove retweets.

        Returns:
            imported english, non-retweeted data
    '''
    #import the data
    filename = TWEET_CORPUS_DATA_IN
    print("\tLoading json data...")
    print("\t\tThis can take a while (about ~10 minutes on current largest community data)")
    print("\t\tGo make yourself a cup of hot thing ;)")
    data = pd.read_json(filename, lines=True)

    # clean data: remove retweets and select only english tweets
    print("\tRemoving reweets and non-english tweets...")
    data = data[~data["text"].progress_apply(lambda x: x.startswith("RT"))]
    data = data[data["lang"].progress_apply(lambda x: x == "en")]
    data = data.rename(columns={'text': 'corpus'})
    print()

    return data



In [4]:
def clean_tweet(tweet, remove_stop):
    '''
        Cleans tweet from hashtags, mentions, special characters, html entities, numbers,
        links, and stop words. Converts text to lower case.

        Args:
            tweet: a single tweet (String)
            remove_stop: True if stopwords should be removed and False if they should not be removed.
        Returns:
            tweet: cleaned tweet (String)
    '''
    tweet = str.lower(tweet)
    tweet = ' '.join(re.sub("(@[A-Za-z0-9_]+)|(#[A-Za-z0-9_]+)", " ", tweet).split()) # remove mentions and hashtags
    tweet = re.sub("(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)", "", tweet, flags=re.MULTILINE) # remove links
    tweet = re.sub("0x([\da-z\.-]+)", "", tweet, flags=re.MULTILINE) # remove addresses/pointers
    tweet = re.sub('\&\w+', "", tweet) # remove html entities (example &amp)

    return tweet

In [5]:
df = load_data()
df

	Loading json data...
		This can take a while (about ~10 minutes on current largest community data)
		Go make yourself a cup of hot thing ;)
	Removing reweets and non-english tweets...


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 459479/459479 [00:12<00:00, 37562.83it/s]
100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 459370/459370 [00:21<00:00, 21249.53it/s]





Unnamed: 0,lang,reply_settings,public_metrics,corpus,possibly_sensitive,entities,created_at,author_id,conversation_id,referenced_tweets,id,context_annotations,source,in_reply_to_user_id,attachments,geo,withheld
0,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",Good project \n#BSC @DinoSour #NFT\n@yamin_rah...,False,"{'hashtags': [{'start': 14, 'end': 18, 'tag': ...",2021-05-31 23:59:42+00:00,1397168952908779520,1399515966774530048,"[{'type': 'quoted', 'id': '1398339274953564163'}]",1399515966774530048,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,,,
1,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@AromaFinance Great project!\n\n$reset #bsc #B...,False,"{'cashtags': [{'start': 30, 'end': 36, 'tag': ...",2021-05-31 23:59:40+00:00,1395244202808680448,1399313027065810944,"[{'type': 'replied_to', 'id': '139931302706581...",1399515957362450432,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.390241e+18,,,
2,en,everyone,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",Such a beautiful project and congratulations t...,False,"{'hashtags': [{'start': 198, 'end': 213, 'tag'...",2021-05-31 23:59:35+00:00,1397350208489463808,1399515936093204480,"[{'type': 'quoted', 'id': '1399372631267287041'}]",1399515936093204480,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,
4,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",I am participating in the CryptoUltraman NFT a...,False,"{'hashtags': [{'start': 60, 'end': 78, 'tag': ...",2021-05-31 23:58:47+00:00,2718560166,1399515734007447552,"[{'type': 'quoted', 'id': '1398277372651081732'}]",1399515734007447552,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,
5,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@apenftorg @CoinMarketCap Nice to find this pr...,False,"{'cashtags': [{'start': 216, 'end': 220, 'tag'...",2021-05-31 23:58:44+00:00,1383794353760391168,1397848170739077120,"[{'type': 'replied_to', 'id': '139784817073907...",1399515723274280960,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.392094e+18,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459473,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",$DENA #NFT #Defi #YieldFarming\nGreat \n@lensa...,False,"{'hashtags': [{'start': 6, 'end': 10, 'tag': '...",2021-02-01 11:31:45+00:00,1322618452108931072,1356203583193063424,"[{'type': 'quoted', 'id': '1349729014944972800'}]",1356203583193063424,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,
459474,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@SMATOOS_now @bagasadys @airdrophunter78 @jher...,False,"{'mentions': [{'start': 0, 'end': 12, 'usernam...",2021-02-01 11:05:55+00:00,343817344,1349729014944972800,"[{'type': 'replied_to', 'id': '134972901494497...",1356197080272752640,,Twitter for Android,3.339523e+08,,,
459475,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@SMATOOS_now @bagasadys @airdrophunter78 @jher...,False,"{'mentions': [{'start': 0, 'end': 12, 'usernam...",2021-02-01 10:49:53+00:00,1099564686788374528,1349729014944972800,"[{'type': 'replied_to', 'id': '134972901494497...",1356193045817872384,,Twitter for Android,3.339523e+08,,,
459476,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@ZthCrypto @AlienWorlds How are NFT rates dete...,False,"{'mentions': [{'start': 0, 'end': 10, 'usernam...",2021-02-01 09:29:19+00:00,1164164048046514176,1356157119909642240,"[{'type': 'replied_to', 'id': '135615711990964...",1356172769424244736,,Twitter Web App,1.138926e+18,,,


In [6]:
'''
    Clean corpus for sentiment and topic modelling code.
'''
print("Cleaning corpus...")
# df = load_data()

# cleaning for sentiment analysis (keep stop words)
# remove_stop = False
# df['cleaned_tweet_sentiment'] = df['corpus'].progress_apply(clean_tweet, remove_stop=remove_stop)
remove_stop = True
df['cleaned_tweet_btm'] = df['corpus'].progress_apply(clean_tweet, remove_stop=remove_stop)

print("Finished cleaning corpus. The next steps will start in a few moments...")

Cleaning corpus...


100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 407489/407489 [00:26<00:00, 15198.77it/s]


Finished cleaning corpus. The next steps will start in a few moments...


In [7]:
df['date'] = df['created_at'].dt.date
df['time'] = df['created_at'].dt.time

In [8]:
# group tweets by date and count number of entries per day
dates = df.groupby('date').count()

In [9]:
dates[dates['created_at']>15000]

Unnamed: 0_level_0,lang,reply_settings,public_metrics,corpus,possibly_sensitive,entities,created_at,author_id,conversation_id,referenced_tweets,id,context_annotations,source,in_reply_to_user_id,attachments,geo,withheld,cleaned_tweet_btm,time
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-05-01,17097,17097,17097,17097,17097,17097,17097,17097,17097,3319,17097,16712,17097,2666,53,71,0,17097,17097
2021-05-05,25580,25580,25580,25580,25580,25580,25580,25580,25580,1486,25580,25506,25580,18271,100,120,0,25580,25580
2021-05-22,16535,16535,16535,16535,16535,16535,16535,16535,16535,15700,16535,16314,16535,7960,52,70,0,16535,16535
2021-05-25,17292,17292,17292,17292,17292,17292,17292,17292,17292,11187,17292,15274,17292,8533,92,85,0,17292,17292
2021-05-31,17893,17893,17893,17893,17893,17893,17893,17893,17893,17091,17893,17355,17893,7939,64,60,0,17893,17893


In [17]:
dates

Unnamed: 0_level_0,lang,reply_settings,public_metrics,corpus,possibly_sensitive,entities,created_at,author_id,conversation_id,referenced_tweets,id,context_annotations,source,in_reply_to_user_id,attachments,geo,withheld,cleaned_tweet_btm,time
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2021-02-01,8,8,8,8,8,8,8,8,8,8,8,1,8,7,0,0,0,8,8
2021-02-02,9,9,9,9,9,9,9,9,9,6,9,7,9,4,0,0,0,9,9
2021-02-03,13,13,13,13,13,13,13,13,13,12,13,5,13,8,2,0,0,13,13
2021-02-04,3,3,3,3,3,3,3,3,3,3,3,1,3,1,0,0,0,3,3
2021-02-05,7,7,7,7,7,7,7,7,7,6,7,1,7,5,2,0,0,7,7
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-27,6417,6417,6417,6417,6417,6417,6417,6417,6417,5538,6417,5559,6417,3311,59,25,0,6417,6417
2021-05-28,9037,9037,9037,9037,9037,9037,9037,9037,9037,8470,9037,8497,9037,3683,54,36,0,9037,9037
2021-05-29,7751,7751,7751,7751,7751,7751,7751,7751,7751,6346,7751,7199,7751,3019,92,17,0,7751,7751
2021-05-30,9150,9150,9150,9150,9150,9150,9150,9150,9150,7871,9150,8459,9150,4352,83,31,0,9150,9150


In [10]:
may_1 = df[df['date'] == pd.to_datetime("2021-05-01")]
may_1

  result = libops.scalar_compare(x.ravel(), y, op)


Unnamed: 0,lang,reply_settings,public_metrics,corpus,possibly_sensitive,entities,created_at,author_id,conversation_id,referenced_tweets,id,context_annotations,source,in_reply_to_user_id,attachments,geo,withheld,cleaned_tweet_btm,date,time
297362,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","💪Wow, $72~$1000 Airdrop, don't miss the @Crypt...",False,"{'hashtags': [{'start': 240, 'end': 245, 'tag'...",2021-05-01 23:59:48+00:00,1068724741,1388644354600816640,,1388644354600816640,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,"💪wow, $72~$1000 airdrop, don't miss the airdro...",2021-05-01,23:59:48
297363,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","@fanadisenft Thanks for the opportunity, it wi...",False,"{'hashtags': [{'start': 102, 'end': 106, 'tag'...",2021-05-01 23:59:43+00:00,1134924652466204672,1388573491113480192,"[{'type': 'replied_to', 'id': '138857349111348...",1388644332576464896,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,1.377342e+18,,,,"thanks for the opportunity, it will be a great...",2021-05-01,23:59:43
297364,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@cybermiles I think this project will be great...,False,"{'hashtags': [{'start': 69, 'end': 80, 'tag': ...",2021-05-01 23:59:29+00:00,1388062653449441280,1387750074608865280,"[{'type': 'replied_to', 'id': '138775007460886...",1388644275823550464,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,9.002680e+17,,,,i think this project will be great and very su...,2021-05-01,23:59:29
297365,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",A lit world of free creation that will soon be...,False,"{'hashtags': [{'start': 59, 'end': 70, 'tag': ...",2021-05-01 23:59:12+00:00,1377248305571192832,1388644201273827328,,1388644201273827328,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,,,,a lit world of free creation that will soon be...,2021-05-01,23:59:12
297366,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",I think this project will be great and very su...,False,"{'hashtags': [{'start': 70, 'end': 81, 'tag': ...",2021-05-01 23:58:31+00:00,1388062653449441280,1388644032243552256,,1388644032243552256,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,i think this project will be great and very su...,2021-05-01,23:58:31
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
314861,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","🚩Wow, $72~$1000 Airdrop, don't miss the @Crypt...",False,"{'annotations': [{'start': 132, 'end': 150, 'p...",2021-05-01 00:01:18+00:00,1352202408961589248,1388282342981050368,,1388282342981050368,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,"🚩wow, $72~$1000 airdrop, don't miss the airdro...",2021-05-01,00:01:18
314862,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",A lit world of free creation that will soon be...,False,"{'urls': [{'start': 139, 'end': 162, 'url': 'h...",2021-05-01 00:01:06+00:00,1387813847088599040,1388282295316979712,,1388282295316979712,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,a lit world of free creation that will soon be...,2021-05-01,00:01:06
314863,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",A lit world of free creation that will soon be...,False,"{'urls': [{'start': 139, 'end': 162, 'url': 'h...",2021-05-01 00:00:58+00:00,1377277543561060352,1388282259749306368,,1388282259749306368,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,a lit world of free creation that will soon be...,2021-05-01,00:00:58
314864,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",A lit world of free creation that will soon be...,False,"{'urls': [{'start': 139, 'end': 162, 'url': 'h...",2021-05-01 00:00:54+00:00,1117477244769718272,1388282241822859264,,1388282241822859264,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,a lit world of free creation that will soon be...,2021-05-01,00:00:54


In [16]:
len(may_1)

17097

In [11]:
may_5 = df[df['date'] == pd.to_datetime("2021-05-05")]
may_5

  result = libops.scalar_compare(x.ravel(), y, op)


Unnamed: 0,lang,reply_settings,public_metrics,corpus,possibly_sensitive,entities,created_at,author_id,conversation_id,referenced_tweets,id,context_annotations,source,in_reply_to_user_id,attachments,geo,withheld,cleaned_tweet_btm,date,time
246864,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",Get one of the 100 Gen0 #KryptomonEggs\nBe the...,False,"{'hashtags': [{'start': 24, 'end': 38, 'tag': ...",2021-05-05 23:59:51+00:00,1302330326069329920,1390093917102563328,,1390093917102563328,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,,,,get one of the 100 gen0 be the first to own a ...,2021-05-05,23:59:51
246865,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@renft_protocol is a multi-chain liquidity so...,False,"{'hashtags': [{'start': 122, 'end': 126, 'tag'...",2021-05-05 23:58:58+00:00,617259907,1390093694640947200,,1390093694640947200,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.370185e+18,,,,is a multi-chain liquidity solution platform t...,2021-05-05,23:58:58
246866,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@renft_protocol is a multi-chain liquidity so...,False,"{'hashtags': [{'start': 122, 'end': 126, 'tag'...",2021-05-05 23:58:47+00:00,1012237887796940800,1390093649170468864,,1390093649170468864,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.370185e+18,,,,is a multi-chain liquidity solution platform t...,2021-05-05,23:58:47
246867,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@renft_protocol is a multi-chain liquidity so...,False,"{'hashtags': [{'start': 122, 'end': 126, 'tag'...",2021-05-05 23:58:35+00:00,1370685806105358336,1390093600692727808,,1390093600692727808,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.370185e+18,,,,is a multi-chain liquidity solution platform t...,2021-05-05,23:58:35
246868,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@renft_protocol is a multi-chain liquidity so...,False,"{'hashtags': [{'start': 122, 'end': 126, 'tag'...",2021-05-05 23:58:35+00:00,1366619298911772672,1390093599061135360,,1390093599061135360,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,1.370185e+18,,,,is a multi-chain liquidity solution platform t...,2021-05-05,23:58:35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
273086,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",Get one of the 100 Gen0 #KryptomonEggs\nBe the...,False,"{'hashtags': [{'start': 24, 'end': 38, 'tag': ...",2021-05-05 00:00:32+00:00,265252697,1389731703325794304,,1389731703325794304,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,,,,get one of the 100 gen0 be the first to own a ...,2021-05-05,00:00:32
273087,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",A lit world of free creation that will soon be...,False,"{'hashtags': [{'start': 59, 'end': 70, 'tag': ...",2021-05-05 00:00:21+00:00,1205832778358644736,1389731656643366912,,1389731656643366912,"[{'domain': {'id': '3', 'name': 'TV Shows', 'd...",Twitter Web App,,,,,a lit world of free creation that will soon be...,2021-05-05,00:00:21
273088,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",Get one of the 100 Gen0 #KryptomonEggs\nBe the...,False,"{'hashtags': [{'start': 24, 'end': 38, 'tag': ...",2021-05-05 00:00:15+00:00,1369054047655911424,1389731630542114816,,1389731630542114816,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,get one of the 100 gen0 be the first to own a ...,2021-05-05,00:00:15
273089,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@renft_protocol is a multi-chain liquidity so...,False,"{'hashtags': [{'start': 122, 'end': 126, 'tag'...",2021-05-05 00:00:14+00:00,957346219902169088,1389731626918178816,,1389731626918178816,"[{'domain': {'id': '3', 'name': 'TV Shows', 'd...",Twitter for iPhone,1.370185e+18,,,,is a multi-chain liquidity solution platform t...,2021-05-05,00:00:14


In [12]:
may_22 = df[df['date'] == pd.to_datetime("2021-05-22")]
may_22

  result = libops.scalar_compare(x.ravel(), y, op)


Unnamed: 0,lang,reply_settings,public_metrics,corpus,possibly_sensitive,entities,created_at,author_id,conversation_id,referenced_tweets,id,context_annotations,source,in_reply_to_user_id,attachments,geo,withheld,cleaned_tweet_btm,date,time
108392,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",I am participating in @asvalabofficial airdrop...,False,"{'hashtags': [{'start': 86, 'end': 95, 'tag': ...",2021-05-22 23:59:56+00:00,1006299619851100160,1396254534586961920,"[{'type': 'quoted', 'id': '1395658467071840262'}]",1396254534586961920,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,,,,i am participating in airdrop,2021-05-22,23:59:56
108393,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@asvalabofficial I am participating in @asvala...,False,"{'hashtags': [{'start': 103, 'end': 112, 'tag'...",2021-05-22 23:59:46+00:00,1006299619851100160,1395658467071840256,"[{'type': 'replied_to', 'id': '139565846707184...",1396254492476141568,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.389162e+18,,,,i am participating in airdrop,2021-05-22,23:59:46
108394,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",I am participating in @asvalabofficial airdrop...,False,"{'hashtags': [{'start': 93, 'end': 102, 'tag':...",2021-05-22 23:58:56+00:00,1394074739019116544,1396254283167801344,"[{'type': 'quoted', 'id': '1395658467071840262'}]",1396254283167801344,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,i am participating in airdrop,2021-05-22,23:58:56
108395,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",BSClaunch DEFI platform has many new solutions...,False,"{'hashtags': [{'start': 153, 'end': 163, 'tag'...",2021-05-22 23:58:14+00:00,68267022,1396254103819358208,,1396254103819358208,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,{'media_keys': ['3_1396252413061853191']},,,bsclaunch defi platform has many new solutions...,2021-05-22,23:58:14
108396,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@asvalabofficial “I am participating in @asval...,False,"{'hashtags': [{'start': 68, 'end': 77, 'tag': ...",2021-05-22 23:57:54+00:00,1281038430663618560,1395658467071840256,"[{'type': 'replied_to', 'id': '139565846707184...",1396254023083171840,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,1.389162e+18,,,,"“i am participating in airdrop""",2021-05-22,23:57:54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
126272,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",I am participating in @asvalabofficial airdrop...,False,"{'hashtags': [{'start': 83, 'end': 92, 'tag': ...",2021-05-22 00:07:28+00:00,820681376752160768,1395894039623442432,"[{'type': 'quoted', 'id': '1395658467071840262'}]",1395894039623442432,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,i am participating in airdrop,2021-05-22,00:07:28
126274,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@MODISOL_NFT Good project @Arrume2 @rumistk @R...,False,"{'mentions': [{'start': 0, 'end': 12, 'usernam...",2021-05-22 00:05:52+00:00,1344955860624101376,1395108472094527488,"[{'type': 'replied_to', 'id': '139510847209452...",1395893640069718016,,Twitter Web App,1.395091e+18,,,,good project,2021-05-22,00:05:52
126279,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",Great project loaded with lots of prospect the...,False,"{'mentions': [{'start': 143, 'end': 157, 'user...",2021-05-22 00:01:20+00:00,1388861456603512832,1395892496698273792,"[{'type': 'quoted', 'id': '1394987725913219072'}]",1395892496698273792,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,great project loaded with lots of prospect the...,2021-05-22,00:01:20
126280,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@ethernaal Great project loaded with lots of p...,False,"{'mentions': [{'start': 0, 'end': 10, 'usernam...",2021-05-22 00:01:05+00:00,1388861456603512832,1394987725913219072,"[{'type': 'replied_to', 'id': '139498772591321...",1395892433095839744,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,1.373252e+18,,,,great project loaded with lots of prospect the...,2021-05-22,00:01:05


In [13]:
may_25 = df[df['date'] == pd.to_datetime("2021-05-25")]
may_25

  result = libops.scalar_compare(x.ravel(), y, op)


Unnamed: 0,lang,reply_settings,public_metrics,corpus,possibly_sensitive,entities,created_at,author_id,conversation_id,referenced_tweets,id,context_annotations,source,in_reply_to_user_id,attachments,geo,withheld,cleaned_tweet_btm,date,time
74453,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@nftgateio Iam participating in @nftgate.io ai...,False,"{'hashtags': [{'start': 100, 'end': 108, 'tag'...",2021-05-25 23:59:52+00:00,1388124557463019520,1396718736388722688,"[{'type': 'replied_to', 'id': '139671873638872...",1397341681750474752,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for iPhone,9.657424e+17,,,,iam participating in .io airdrop # airdrops,2021-05-25,23:59:52
74454,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","I'm Joining HSC Mainnet to Win NFT Airdrops, t...",False,"{'hashtags': [{'start': 70, 'end': 74, 'tag': ...",2021-05-25 23:59:39+00:00,2507239160,1397341623588032512,,1397341623588032512,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,"i'm joining hsc mainnet to win nft airdrops, t...",2021-05-25,23:59:39
74455,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@dagifinance The project is implemented very p...,False,"{'hashtags': [{'start': 215, 'end': 226, 'tag'...",2021-05-25 23:59:38+00:00,1353373937665675264,1394659845588979712,"[{'type': 'replied_to', 'id': '139465984558897...",1397341622371713024,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,1.389575e+18,,,,the project is implemented very professionally...,2021-05-25,23:59:38
74456,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",I am participating in @https://twitter.com/nft...,False,"{'hashtags': [{'start': 102, 'end': 111, 'tag'...",2021-05-25 23:59:31+00:00,1382214302778617856,1397341590096543744,"[{'type': 'quoted', 'id': '1396718736388722689'}]",1397341590096543744,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,,,,i am participating in ://://,2021-05-25,23:59:31
74458,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@nftgateio #NFTGATE #NFT I am participating in...,False,"{'hashtags': [{'start': 11, 'end': 19, 'tag': ...",2021-05-25 23:58:52+00:00,1366189891256684544,1396718736388722688,"[{'type': 'replied_to', 'id': '139671873638872...",1397341428087332864,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,9.657424e+17,,,,i am participating in airdrop it's great so bi...,2021-05-25,23:58:52
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
92735,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",Nice project\nThis project looks very interest...,False,"{'mentions': [{'start': 162, 'end': 173, 'user...",2021-05-25 00:03:59+00:00,1370095944004370432,1396980328296681472,"[{'type': 'quoted', 'id': '1396812892696027141'}]",1396980328296681472,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,nice project this project looks very interesti...,2021-05-25,00:03:59
92736,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...","Wow Airdroo is live, This project looks very i...",False,"{'mentions': [{'start': 173, 'end': 182, 'user...",2021-05-25 00:01:40+00:00,1322230341868814336,1396979743031906304,"[{'type': 'quoted', 'id': '1396812892696027141'}]",1396979743031906304,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,"wow airdroo is live, this project looks very i...",2021-05-25,00:01:40
92737,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@nft_qr_code This project is very Good. I Love...,False,"{'mentions': [{'start': 0, 'end': 12, 'usernam...",2021-05-25 00:01:28+00:00,1386517838974787584,1391511576264458240,"[{'type': 'replied_to', 'id': '139151157626445...",1396979695153934336,,Twitter for Android,1.388226e+18,,,,this project is very good. i love the project....,2021-05-25,00:01:28
92738,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",I got in!!!! I'm registered for the upcoming @...,False,"{'mentions': [{'start': 45, 'end': 58, 'userna...",2021-05-25 00:01:00+00:00,1374012051933253632,1396979575301844992,,1396979575301844992,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,,,,i got in!!!! i'm registered for the upcoming s...,2021-05-25,00:01:00


In [14]:
may_31 = df[df['date'] == pd.to_datetime("2021-05-31")]
may_31

  result = libops.scalar_compare(x.ravel(), y, op)


Unnamed: 0,lang,reply_settings,public_metrics,corpus,possibly_sensitive,entities,created_at,author_id,conversation_id,referenced_tweets,id,context_annotations,source,in_reply_to_user_id,attachments,geo,withheld,cleaned_tweet_btm,date,time
0,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",Good project \n#BSC @DinoSour #NFT\n@yamin_rah...,False,"{'hashtags': [{'start': 14, 'end': 18, 'tag': ...",2021-05-31 23:59:42+00:00,1397168952908779520,1399515966774530048,"[{'type': 'quoted', 'id': '1398339274953564163'}]",1399515966774530048,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,,,,,good project,2021-05-31,23:59:42
1,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@AromaFinance Great project!\n\n$reset #bsc #B...,False,"{'cashtags': [{'start': 30, 'end': 36, 'tag': ...",2021-05-31 23:59:40+00:00,1395244202808680448,1399313027065810944,"[{'type': 'replied_to', 'id': '139931302706581...",1399515957362450432,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.390241e+18,,,,great project! $reset,2021-05-31,23:59:40
2,en,everyone,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",Such a beautiful project and congratulations t...,False,"{'hashtags': [{'start': 198, 'end': 213, 'tag'...",2021-05-31 23:59:35+00:00,1397350208489463808,1399515936093204480,"[{'type': 'quoted', 'id': '1399372631267287041'}]",1399515936093204480,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,such a beautiful project and congratulations t...,2021-05-31,23:59:35
4,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",I am participating in the CryptoUltraman NFT a...,False,"{'hashtags': [{'start': 60, 'end': 78, 'tag': ...",2021-05-31 23:58:47+00:00,2718560166,1399515734007447552,"[{'type': 'quoted', 'id': '1398277372651081732'}]",1399515734007447552,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,i am participating in the cryptoultraman nft a...,2021-05-31,23:58:47
5,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@apenftorg @CoinMarketCap Nice to find this pr...,False,"{'cashtags': [{'start': 216, 'end': 220, 'tag'...",2021-05-31 23:58:44+00:00,1383794353760391168,1397848170739077120,"[{'type': 'replied_to', 'id': '139784817073907...",1399515723274280960,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.392094e+18,,,,"nice to find this project, this project will b...",2021-05-31,23:58:44
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22751,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",Thanks for shared this wonderful opportunity. ...,False,"{'urls': [{'start': 196, 'end': 219, 'url': 'h...",2021-05-31 00:01:54+00:00,1022339239839641600,1399154129973387264,"[{'type': 'quoted', 'id': '1398339274953564163'}]",1399154129973387264,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,,,,,thanks for shared this wonderful opportunity. ...,2021-05-31,00:01:54
22752,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@Enzo__NZO Click the link to receive 1213 $NZO...,False,"{'urls': [{'start': 153, 'end': 176, 'url': 'h...",2021-05-31 00:01:30+00:00,1264387223329345536,1399154029071147008,,1399154029071147008,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter for Android,1.386201e+18,,,,click the link to receive 1213 $nzo ($500.00),2021-05-31,00:01:30
22755,en,everyone,"{'retweet_count': 1, 'reply_count': 0, 'like_c...",@DinoSourFamily @BinanceChain Good and strong ...,False,"{'hashtags': [{'start': 87, 'end': 91, 'tag': ...",2021-05-31 00:01:13+00:00,791618831378681856,1398339274953564160,"[{'type': 'replied_to', 'id': '139833927495356...",1399153960083066880,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.369325e+18,,,,good and strong 💪 project guys join this airdr...,2021-05-31,00:01:13
22758,en,everyone,"{'retweet_count': 0, 'reply_count': 0, 'like_c...",@asifaslam0 \n@MDALAMI16 \n@saiful04420060 \n\...,False,"{'urls': [{'start': 77, 'end': 100, 'url': 'ht...",2021-05-31 00:00:38+00:00,1245731791333289984,1399153812024086528,"[{'type': 'quoted', 'id': '1398339274953564163'}]",1399153812024086528,"[{'domain': {'id': '45', 'name': 'Brand Vertic...",Twitter Web App,1.379746e+18,,,,great project,2021-05-31,00:00:38


In [15]:
selected_columns = ["date", "id", "corpus", "public_metrics", "entities", "author_id", "conversation_id"] # output created_at, id, and cleaned_tweets to csv
may_1.to_csv(MAY_1_DATA_OUT, columns = selected_columns)
may_5.to_csv(MAY_5_DATA_OUT, columns = selected_columns)
may_22.to_csv(MAY_22_DATA_OUT, columns = selected_columns)
may_25.to_csv(MAY_25_DATA_OUT, columns = selected_columns)
may_31.to_csv(MAY_31_DATA_OUT, columns = selected_columns)