In [1]:
import pandas as pd
import numpy as np
import os

In [3]:
# PATHs

## The directory where the dune data is stored
ORI_DATA_PATH = '../ori_data'

## The directory where the three databases are stored
DATABASE_PATH = '../database'

## The directory where the visualization data for each visualization is stored
VIS_DATA_PATH = '../../data'

## The directory where the scraped tweets data is stored
TWEET_PATH = '../ori_data/tweets'

In [3]:
# read transaction database
tx_db = pd.read_csv('{}/tx_db.csv'.format(DATABASE_PATH), index_col=0)

# read cryptopunk database
punk_db = pd.read_csv('{}/punk_db.csv'.format(DATABASE_PATH), index_col=0)
punk_db['attributes'] = punk_db['attributes'].apply(eval)

# read trader database
trader_db = pd.read_csv('{}/trader_db.csv'.format(DATABASE_PATH), index_col=0)

# read tweet database
tweets_db = pd.read_csv('{}/tweets_db.csv'.format(DATABASE_PATH), lineterminator='\n')


### 5. Ridgeline Chart

<img src="../imgs/vis5_example.png" alt="drawing" width="500">

Data format

```
Index
0   date

Column
1   positive_count
2   neutral_count
3   negative_count
4   punk_volume: avg punk tx volume on the date
```

In [27]:

def get_dataset_vis5(tweets_db):
    vis5_df = tweets_db.copy(deep=True)
    vis5_df.dropna(inplace=True)

    # keep only the tweets after 2017-06-23
    vis5_df['date'] = vis5_df['date'].apply(lambda x: x[:-6])
    vis5_df['date'] = pd.to_datetime(vis5_df['date'])
    before = len(vis5_df)
    vis5_df = vis5_df[vis5_df['date'] > '2017-06-23']
    after = len(vis5_df)
    print('Number of tweets dropped: {}'.format(before-after))

    vis5_df.sort_values(by='date', inplace=True)

    vis5_df['content'] = vis5_df['content'].apply(
        lambda x: x.lower().replace('\n', ' '))

    return vis5_df


vis5_df = get_dataset_vis5(tweets_db)
vis5_df


Number of tweets dropped: 323


Unnamed: 0,id,date,content,url,username
323,880138889511436289,2017-06-28 19:00:38,trust me. i’m a designer. @jamesvictore https://t.co/8vjllw3tyo #dangerousideas,https://twitter.com/purity_nft/status/880138889511436289,purity_nft
324,881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only thing that separates us is work ethic 💪,https://twitter.com/Gavin_John_NFT/status/881049753169010688,Gavin_John_NFT
325,882240852017467393,2017-07-04 14:13:05,nft ohio &amp; nexgen athlete @xandero66 continues to impress to earn the next level. proud of the work ethic and commitment https://t.co/9o3nnwknb6,https://twitter.com/Coach_UQB/status/882240852017467393,Coach_UQB
326,882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/882776637422448640,Gavin_John_NFT
327,884644278294544384,2017-07-11 05:23:26,"dear me, i guess its hard to trust anyone but never give up on them. you just met the wrong ones. #throwback #life",https://twitter.com/thebuttlord_NFT/status/884644278294544384,thebuttlord_NFT
...,...,...,...,...,...
83563,1523280063529365504,2022-05-08 12:34:02,@mediavenir @elonjet metamask wallet trust nft crypto lionel messi cr7 goat,https://twitter.com/spacecrevette/status/1523280063529365504,spacecrevette
83564,1523282476382052353,2022-05-08 12:43:38,"the first responsible for the mess the #nft space is in are buyers! constantly seeking their dopamine fix, more announcements, more sneak peek, more hype more more more… to finally get rekted. building strong foundations takes time, trust the founders not the hype!",https://twitter.com/izildurrr/status/1523282476382052353,izildurrr
83565,1523283530456834049,2022-05-08 12:47:49,@dunkindonuts i lost my metamask trust wallet sugar daddy nft ethereum #nfts,https://twitter.com/mel_the_super/status/1523283530456834049,mel_the_super
83566,1523283971261743106,2022-05-08 12:49:34,requesting $sol funds from the #stakely faucet on the solana blockchain. request id: 6ub1suej #privacy https://t.co/pglpvmruwm,https://twitter.com/ZAHA_NFT/status/1523283971261743106,ZAHA_NFT


In [8]:
all_tweet_content = '\n'.join(vis5_df["content"])
with open('{}/vis5_all_tweet_content.txt'.format(VIS_DATA_PATH), 'w') as f:
    f.write(all_tweet_content)


### Sentiment Analysis

#### TextBlob

In [33]:
vis5_df = get_dataset_vis5(tweets_db)
vis5_df.reset_index(inplace=True)
vis5_df.drop(columns=['index'], inplace=True)
# vis5_df.drop(columns=['level_0'], inplace=True)
vis5_df

Number of tweets dropped: 323


Unnamed: 0,id,date,content,url,username
0,880138889511436289,2017-06-28 19:00:38,trust me. i’m a designer. @jamesvictore https://t.co/8vjllw3tyo #dangerousideas,https://twitter.com/purity_nft/status/880138889511436289,purity_nft
1,881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only thing that separates us is work ethic 💪,https://twitter.com/Gavin_John_NFT/status/881049753169010688,Gavin_John_NFT
2,882240852017467393,2017-07-04 14:13:05,nft ohio &amp; nexgen athlete @xandero66 continues to impress to earn the next level. proud of the work ethic and commitment https://t.co/9o3nnwknb6,https://twitter.com/Coach_UQB/status/882240852017467393,Coach_UQB
3,882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/882776637422448640,Gavin_John_NFT
4,884644278294544384,2017-07-11 05:23:26,"dear me, i guess its hard to trust anyone but never give up on them. you just met the wrong ones. #throwback #life",https://twitter.com/thebuttlord_NFT/status/884644278294544384,thebuttlord_NFT
...,...,...,...,...,...
83240,1523280063529365504,2022-05-08 12:34:02,@mediavenir @elonjet metamask wallet trust nft crypto lionel messi cr7 goat,https://twitter.com/spacecrevette/status/1523280063529365504,spacecrevette
83241,1523282476382052353,2022-05-08 12:43:38,"the first responsible for the mess the #nft space is in are buyers! constantly seeking their dopamine fix, more announcements, more sneak peek, more hype more more more… to finally get rekted. building strong foundations takes time, trust the founders not the hype!",https://twitter.com/izildurrr/status/1523282476382052353,izildurrr
83242,1523283530456834049,2022-05-08 12:47:49,@dunkindonuts i lost my metamask trust wallet sugar daddy nft ethereum #nfts,https://twitter.com/mel_the_super/status/1523283530456834049,mel_the_super
83243,1523283971261743106,2022-05-08 12:49:34,requesting $sol funds from the #stakely faucet on the solana blockchain. request id: 6ub1suej #privacy https://t.co/pglpvmruwm,https://twitter.com/ZAHA_NFT/status/1523283971261743106,ZAHA_NFT


In [9]:
from wordcloud import WordCloud,STOPWORDS
stopwords = set(STOPWORDS)
from textblob import TextBlob
from collections import Counter
import re

In [34]:
def preprocess(text):
    return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|(#[A-Za-z0-9]+)", " ", text).split()).lower()

# Clean the data
vis5_df['text'] = vis5_df['content'].apply(preprocess)

# Remove Stop Words
vis5_df['text'] = vis5_df['text'].apply(lambda tweet: ' '.join([word for word in tweet.split() if word not in stopwords]))

vis5_df

Unnamed: 0,id,date,content,url,username,text
0,880138889511436289,2017-06-28 19:00:38,trust me. i’m a designer. @jamesvictore https://t.co/8vjllw3tyo #dangerousideas,https://twitter.com/purity_nft/status/880138889511436289,purity_nft,trust m designer dangerousideas
1,881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only thing that separates us is work ethic 💪,https://twitter.com/Gavin_John_NFT/status/881049753169010688,Gavin_John_NFT,re gym doors thing separates us work ethic
2,882240852017467393,2017-07-04 14:13:05,nft ohio &amp; nexgen athlete @xandero66 continues to impress to earn the next level. proud of the work ethic and commitment https://t.co/9o3nnwknb6,https://twitter.com/Coach_UQB/status/882240852017467393,Coach_UQB,nft ohio amp nexgen athlete continues impress earn next level proud work ethic commitment
3,882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/882776637422448640,Gavin_John_NFT,don t trust people age without wrinkles
4,884644278294544384,2017-07-11 05:23:26,"dear me, i guess its hard to trust anyone but never give up on them. you just met the wrong ones. #throwback #life",https://twitter.com/thebuttlord_NFT/status/884644278294544384,thebuttlord_NFT,dear guess hard trust anyone never give met wrong ones throwback life
...,...,...,...,...,...,...
83240,1523280063529365504,2022-05-08 12:34:02,@mediavenir @elonjet metamask wallet trust nft crypto lionel messi cr7 goat,https://twitter.com/spacecrevette/status/1523280063529365504,spacecrevette,metamask wallet trust nft crypto lionel messi cr7 goat
83241,1523282476382052353,2022-05-08 12:43:38,"the first responsible for the mess the #nft space is in are buyers! constantly seeking their dopamine fix, more announcements, more sneak peek, more hype more more more… to finally get rekted. building strong foundations takes time, trust the founders not the hype!",https://twitter.com/izildurrr/status/1523282476382052353,izildurrr,first responsible mess nft space buyers constantly seeking dopamine fix announcements sneak peek hype finally rekted building strong foundations takes time trust founders hype
83242,1523283530456834049,2022-05-08 12:47:49,@dunkindonuts i lost my metamask trust wallet sugar daddy nft ethereum #nfts,https://twitter.com/mel_the_super/status/1523283530456834049,mel_the_super,lost metamask trust wallet sugar daddy nft ethereum nfts
83243,1523283971261743106,2022-05-08 12:49:34,requesting $sol funds from the #stakely faucet on the solana blockchain. request id: 6ub1suej #privacy https://t.co/pglpvmruwm,https://twitter.com/ZAHA_NFT/status/1523283971261743106,ZAHA_NFT,requesting sol funds stakely faucet solana blockchain request id 6ub1suej privacy


In [35]:
# Using TextBlob to analyze tweets to predict text sentiment and categorize as 'Positive', 'Negative' or 'Neutral'.
%time
vis5_df['sentiment'] = ' '
vis5_df['polarity'] = None

for i, tweet in enumerate(vis5_df.text):
    blob = TextBlob(tweet)
    vis5_df['polarity'][i] = blob.sentiment.polarity
    if blob.sentiment.polarity > 0:
        vis5_df['sentiment'][i] = 'pos'
    elif blob.sentiment.polarity < 0:
        vis5_df['sentiment'][i] = 'neg'
    else:
        vis5_df['sentiment'][i] = 'neu'
        
pd.set_option('display.max_colwidth', 400)

vis5_df.to_csv('{}/vis5_df_sa.csv'.format(VIS_DATA_PATH), index=False)

vis5_df


CPU times: user 3 µs, sys: 1 µs, total: 4 µs
Wall time: 6.91 µs


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vis5_df['polarity'][i] = blob.sentiment.polarity
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vis5_df['sentiment'][i] = 'neu'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vis5_df['sentiment'][i] = 'pos'
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  vis5_df['sentiment'][i] = 'neg'


Unnamed: 0,id,date,content,url,username,text,sentiment,polarity
0,880138889511436289,2017-06-28 19:00:38,trust me. i’m a designer. @jamesvictore https://t.co/8vjllw3tyo #dangerousideas,https://twitter.com/purity_nft/status/880138889511436289,purity_nft,trust m designer dangerousideas,neu,0.0
1,881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only thing that separates us is work ethic 💪,https://twitter.com/Gavin_John_NFT/status/881049753169010688,Gavin_John_NFT,re gym doors thing separates us work ethic,neu,0.0
2,882240852017467393,2017-07-04 14:13:05,nft ohio &amp; nexgen athlete @xandero66 continues to impress to earn the next level. proud of the work ethic and commitment https://t.co/9o3nnwknb6,https://twitter.com/Coach_UQB/status/882240852017467393,Coach_UQB,nft ohio amp nexgen athlete continues impress earn next level proud work ethic commitment,pos,0.4
3,882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/882776637422448640,Gavin_John_NFT,don t trust people age without wrinkles,neu,0.0
4,884644278294544384,2017-07-11 05:23:26,"dear me, i guess its hard to trust anyone but never give up on them. you just met the wrong ones. #throwback #life",https://twitter.com/thebuttlord_NFT/status/884644278294544384,thebuttlord_NFT,dear guess hard trust anyone never give met wrong ones throwback life,neg,-0.395833
...,...,...,...,...,...,...,...,...
83240,1523280063529365504,2022-05-08 12:34:02,@mediavenir @elonjet metamask wallet trust nft crypto lionel messi cr7 goat,https://twitter.com/spacecrevette/status/1523280063529365504,spacecrevette,metamask wallet trust nft crypto lionel messi cr7 goat,neu,0.0
83241,1523282476382052353,2022-05-08 12:43:38,"the first responsible for the mess the #nft space is in are buyers! constantly seeking their dopamine fix, more announcements, more sneak peek, more hype more more more… to finally get rekted. building strong foundations takes time, trust the founders not the hype!",https://twitter.com/izildurrr/status/1523282476382052353,izildurrr,first responsible mess nft space buyers constantly seeking dopamine fix announcements sneak peek hype finally rekted building strong foundations takes time trust founders hype,pos,0.118056
83242,1523283530456834049,2022-05-08 12:47:49,@dunkindonuts i lost my metamask trust wallet sugar daddy nft ethereum #nfts,https://twitter.com/mel_the_super/status/1523283530456834049,mel_the_super,lost metamask trust wallet sugar daddy nft ethereum nfts,neu,0.0
83243,1523283971261743106,2022-05-08 12:49:34,requesting $sol funds from the #stakely faucet on the solana blockchain. request id: 6ub1suej #privacy https://t.co/pglpvmruwm,https://twitter.com/ZAHA_NFT/status/1523283971261743106,ZAHA_NFT,requesting sol funds stakely faucet solana blockchain request id 6ub1suej privacy,neu,0.0


#### flair

In [6]:
# !pip install flair

In [14]:
from flair.models import TextClassifier
from flair.data import Sentence
sia = TextClassifier.load('en-sentiment')


def flair_prediction(x):
    sentence = Sentence(x)
    sia.predict(sentence)
    score = sentence.labels[0]
    if "POSITIVE" in str(score):
        return "pos"
    elif "NEGATIVE" in str(score):
        return "neg"
    else:
        return "neu"


vis5_df["sentiment"] = vis5_df["content"].apply(flair_prediction)
vis5_df

2022-05-07 17:39:24,643 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /var/folders/vk/qtf7k0110x7bb94g6sd3655r0000gn/T/tmp7sobgnpj


100%|██████████| 265512723/265512723 [05:08<00:00, 861255.37B/s] 

2022-05-07 17:44:34,318 copying /var/folders/vk/qtf7k0110x7bb94g6sd3655r0000gn/T/tmp7sobgnpj to cache at /Users/brucez/.flair/models/sentiment-en-mix-distillbert_4.pt





2022-05-07 17:44:34,737 removing temp file /var/folders/vk/qtf7k0110x7bb94g6sd3655r0000gn/T/tmp7sobgnpj
2022-05-07 17:44:34,775 loading file /Users/brucez/.flair/models/sentiment-en-mix-distillbert_4.pt


Downloading: 100%|██████████| 28.0/28.0 [00:00<00:00, 12.0kB/s]
Downloading: 100%|██████████| 483/483 [00:00<00:00, 172kB/s]
Downloading: 100%|██████████| 226k/226k [00:01<00:00, 163kB/s]  
Downloading: 100%|██████████| 455k/455k [00:02<00:00, 162kB/s]  


Unnamed: 0,id,date,content,url,username,sentiment
41,881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only t...,https://twitter.com/Gavin_John_NFT/status/8810...,Gavin_John_NFT,pos
42,882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/8827...,Gavin_John_NFT,neg
43,886422232393687040,2017-07-16 03:08:23,i give trust almost instantly but once it's br...,https://twitter.com/Gavin_John_NFT/status/8864...,Gavin_John_NFT,neg
44,889380045567008768,2017-07-24 07:01:41,young justice art reveals new designs for seas...,https://twitter.com/MichaelM_NFT/status/889380...,MichaelM_NFT,pos
45,890659950430638080,2017-07-27 19:47:34,"work and creativity aside, i pride myself most...",https://twitter.com/ALEXI_NFT/status/890659950...,ALEXI_NFT,neg
...,...,...,...,...,...,...
26056,1522723418252193799,2022-05-06 23:42:08,congratulations to our previous winners time f...,https://twitter.com/BOREDAPEKENN3l/status/1522...,BOREDAPEKENN3l,pos
26057,1522726977618137088,2022-05-06 23:56:17,"i'll sell my opensea username for 5eth, haha, ...",https://twitter.com/Brennen308/status/15227269...,Brennen308,neg
26058,1522733542509367298,2022-05-07 00:22:22,congratulations to our previous winners time f...,https://twitter.com/BOREDAPEKENN3l/status/1522...,BOREDAPEKENN3l,pos
26059,1522738109556817920,2022-05-07 00:40:31,filmed our first podcast last night. product...,https://twitter.com/Wardogs_NFT/status/1522738...,Wardogs_NFT,pos


In [15]:
vis5_df.to_csv('{}/vis5_df.csv'.format(VIS_DATA_PATH), index=False)

### Data Clean

In [4]:
vis5_df = pd.read_csv('{}/vis5_df_sa.csv'.format(VIS_DATA_PATH), index_col=0, lineterminator='\n')
vis5_df

Unnamed: 0_level_0,date,content,url,username,text,sentiment,polarity
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
880138889511436289,2017-06-28 19:00:38,trust me. i’m a designer. @jamesvictore https:...,https://twitter.com/purity_nft/status/88013888...,purity_nft,trust m designer dangerousideas,neu,0.000000
881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only t...,https://twitter.com/Gavin_John_NFT/status/8810...,Gavin_John_NFT,re gym doors thing separates us work ethic,neu,0.000000
882240852017467393,2017-07-04 14:13:05,nft ohio &amp; nexgen athlete @xandero66 conti...,https://twitter.com/Coach_UQB/status/882240852...,Coach_UQB,nft ohio amp nexgen athlete continues impress ...,pos,0.400000
882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/8827...,Gavin_John_NFT,don t trust people age without wrinkles,neu,0.000000
884644278294544384,2017-07-11 05:23:26,"dear me, i guess its hard to trust anyone but...",https://twitter.com/thebuttlord_NFT/status/884...,thebuttlord_NFT,dear guess hard trust anyone never give met wr...,neg,-0.395833
...,...,...,...,...,...,...,...
1523280063529365504,2022-05-08 12:34:02,@mediavenir @elonjet metamask wallet trust nft...,https://twitter.com/spacecrevette/status/15232...,spacecrevette,metamask wallet trust nft crypto lionel messi ...,neu,0.000000
1523282476382052353,2022-05-08 12:43:38,the first responsible for the mess the #nft sp...,https://twitter.com/izildurrr/status/152328247...,izildurrr,first responsible mess nft space buyers consta...,pos,0.118056
1523283530456834049,2022-05-08 12:47:49,@dunkindonuts i lost my metamask trust wallet ...,https://twitter.com/mel_the_super/status/15232...,mel_the_super,lost metamask trust wallet sugar daddy nft eth...,neu,0.000000
1523283971261743106,2022-05-08 12:49:34,requesting $sol funds from the #stakely faucet...,https://twitter.com/ZAHA_NFT/status/1523283971...,ZAHA_NFT,requesting sol funds stakely faucet solana blo...,neu,0.000000


In [15]:
n_all = vis5_df.shape[0]

n_pos = vis5_df[vis5_df.sentiment == 'pos'].shape[0]
print(f'pos: {n_pos} | {100.0*n_pos/n_all}%')

n_neu = vis5_df[vis5_df.sentiment == 'neu'].shape[0]
print(f'neu: {n_neu} | {100.0*n_neu/n_all}%')

n_neg = vis5_df[vis5_df.sentiment == 'neg'].shape[0]
print(f'neg: {n_neg} | {100.0*n_neg/n_all}%')

pos: 47831 | 57.45810559192744%
neu: 23567 | 28.31040903357559%
neg: 11847 | 14.231485374496966%


23567

In [8]:
n_neg = vis5_df[vis5_df.sentiment == 'neg'].shape[0]

11847

In [9]:
data = (np.linspace(1, 2, 12)[:, np.newaxis] * np.random.randn(12, 200) +
            (np.arange(12) + 2 * np.random.random(12))[:, np.newaxis])

In [11]:
data.shape

(12, 200)

In [25]:
# getting the data
temp = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv') # we retrieve the data from plotly's GitHub repository
temp['year'] = pd.to_datetime(temp['Date']).dt.year # we store the year in a separate column

# Since we do not want to plot 50+ lines, we only select some years to plot
year_list = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
temp = temp[temp['year'].isin(year_list)]

In [26]:
temp

Unnamed: 0,Date,Max_TemperatureC,Mean_TemperatureC,Min_TemperatureC,year
731,1/1/1950,0,-2.0,-4.0,1950
732,1/2/1950,-4,-9.0,-14.0,1950
733,1/3/1950,-3,-9.0,-14.0,1950
734,1/4/1950,-3,-6.0,-9.0,1950
735,1/5/1950,3,-2.0,-6.0,1950
...,...,...,...,...,...
22915,12/27/2010,7,5.0,3.0,2010
22916,12/28/2010,9,6.0,3.0,2010
22917,12/29/2010,4,2.0,1.0,2010
22918,12/30/2010,2,-1.0,-4.0,2010


In [27]:
# # getting necessary libraries
# import plotly.graph_objects as go
# import numpy as np
# import pandas as pd

# # getting the data
# temp = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2016-weather-data-seattle.csv') # we retrieve the data from plotly's GitHub repository
# temp['year'] = pd.to_datetime(temp['Date']).dt.year # we store the year in a separate column

# # Since we do not want to plot 50+ lines, we only select some years to plot
# year_list = [1950, 1960, 1970, 1980, 1990, 2000, 2010]
# temp = temp[temp['year'].isin(year_list)]

# as we expect to plot histograms-like plots for each year, we group by year and mean temperature and aggregate with 'count' function
temp1 = temp.groupby(['year', 'Mean_TemperatureC']).agg({'Mean_TemperatureC': 'count'}).rename(columns={'Mean_TemperatureC': 'count'}).reset_index()

In [28]:
temp1

Unnamed: 0,year,Mean_TemperatureC,count
0,1950,-12.0,1
1,1950,-11.0,1
2,1950,-10.0,2
3,1950,-9.0,6
4,1950,-8.0,4
...,...,...,...
199,2010,22.0,4
200,2010,23.0,3
201,2010,24.0,2
202,2010,26.0,2


In [20]:
vis5_df = pd.read_csv('{}/vis5_df.csv'.format(VIS_DATA_PATH), index_col=0, lineterminator='\n')
vis5_df[vis5_df['sentiment'] == 'neu']

Unnamed: 0_level_0,date,content,url,username,sentiment
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


In [4]:
vis5_df = pd.read_csv('{}/vis5_df.csv'.format(VIS_DATA_PATH), index_col=0, lineterminator='\n')
vis5_df = vis5_df[vis5_df['date'] > '2021-01-01']
# vis5_df.sort_values(by='sentiment', inplace=True)
vis5_df['date_t'] = vis5_df['date'].apply(lambda x: x.split(' ')[0])
vis5_df = vis5_df.groupby(['sentiment', 'date_t']).agg({'sentiment': 'count'}).rename(columns={'sentiment': 'count'}).reset_index()
vis5_df

Unnamed: 0,sentiment,date_t,count
0,neg,2021-01-01,1
1,neg,2021-01-02,2
2,neg,2021-01-04,1
3,neg,2021-01-05,1
4,neg,2021-01-15,1
...,...,...,...
942,pos,2022-05-03,134
943,pos,2022-05-04,151
944,pos,2022-05-05,154
945,pos,2022-05-06,132


In [5]:
import plotly.graph_objects as go

In [13]:
array_dict = {} # instantiating an empty dictionnary

sent_list = ['neg', 'neu', 'pos']
for sentiment in sent_list:
    array_dict[f'x_{sentiment}'] = vis5_df[vis5_df['sentiment']==sentiment]['date_t']
    array_dict[f'y_{sentiment}'] = vis5_df[vis5_df['sentiment']==sentiment]['count']
    array_dict[f'y_{sentiment}'] = (array_dict[f'y_{sentiment}'] - array_dict[f'y_{sentiment}'].min()) \
                                / (array_dict[f'y_{sentiment}'].max() - array_dict[f'y_{sentiment}'].min()) 

fig = go.Figure()
for index, sent in enumerate(sent_list):
    # fig.add_trace(go.Scatter(
    #                         x=[-20, 40], y=np.full(2, len(sent_list)-index),
    #                         mode='lines',
    #                         line_color='white'))
    
    fig.add_trace(go.Scatter(
                            x=array_dict[f'x_{sentiment}'],
                            y=array_dict[f'y_{sentiment}'] + (len(sent_list)-index) + 0.4,
                            fill='tonexty',
                            name=f'{sentiment}'))
    
    # plotly.graph_objects' way of adding text to a figure
    fig.add_annotation(
                        x=-20,
                        y=len(sent_list)-index,
                        text=f'{sentiment}',
                        showarrow=False,
                        yshift=10)

# here you can modify the figure and the legend titles
fig.update_layout(
                # title='Average temperature from 1950 until 2010 in Seattle',
                showlegend=False,
                # xaxis=dict(title='Temperature in degree Celsius'),
                yaxis=dict(showticklabels=False) # that way you hide the y axis ticks labels
                )

fig.show()

In [17]:
array_dict['x_pos']

466    2021-01-03
467    2021-01-05
468    2021-01-07
469    2021-01-10
470    2021-01-12
          ...    
942    2022-05-03
943    2022-05-04
944    2022-05-05
945    2022-05-06
946    2022-05-07
Name: date_t, Length: 481, dtype: object