In [2]:
import time
import json
import pandas as pd
import numpy as np
import os

In [3]:
# PATHs

## The directory where the dune data is stored
DUNE_DATA_PATH = '../dune_data'

## The directory where the cryptopunk data with attributes info is stored
CSV_PATH = '../cp'

## The directory where the three databases are stored
DATABASE_PATH = '../database'

## The directory where the cryptopunk images is stored
PUNK_IMG_PATH='../punk_imgs'

## The directory where the visualization data for each visualization is stored
VIS_DATA_PATH = '../vis_data'

## The directory where the scraped tweets data is stored
TWEET_PATH = '../tweets'

In [4]:
# read transaction database
tx_db = pd.read_csv('{}/tx_db.csv'.format(DATABASE_PATH), index_col=0)

# read cryptopunk database
punk_db = pd.read_csv('{}/punk_db.csv'.format(DATABASE_PATH), index_col=0)
punk_db['attributes'] = punk_db['attributes'].apply(eval)

# read trader database
trader_db = pd.read_csv('{}/trader_db.csv'.format(DATABASE_PATH), index_col=0)

# read tweet database
tweets_db = pd.read_csv('{}/tweets_db.csv'.format(DATABASE_PATH), lineterminator='\n')


### 5. Ridgeline Chart

<img src="../imgs/vis5_example.png" alt="drawing" width="500">

Data format

```
Index
0   date

Column
1   positive_count
2   neutral_count
3   negative_count
4   punk_volume: avg punk tx volume on the date
```

In [5]:

def get_dataset_vis5(tweets_db):
    vis5_df = tweets_db.copy(deep=True)
    vis5_df.dropna(inplace=True)

    # keep only the tweets after 2017-06-23
    vis5_df['date'] = vis5_df['date'].apply(lambda x: x[:-6])
    vis5_df['date'] = pd.to_datetime(vis5_df['date'])
    before = len(vis5_df)
    vis5_df = vis5_df[vis5_df['date'] > '2017-06-23']
    after = len(vis5_df)
    print('Number of tweets dropped: {}'.format(before-after))

    vis5_df.sort_values(by='date', inplace=True)

    vis5_df['content'] = vis5_df['content'].apply(
        lambda x: x.lower().replace('\n', ' '))

    return vis5_df


vis5_df = get_dataset_vis5(tweets_db)
vis5_df


Number of tweets dropped: 41


Unnamed: 0,id,date,content,url,username
41,881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only t...,https://twitter.com/Gavin_John_NFT/status/8810...,Gavin_John_NFT
42,882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/8827...,Gavin_John_NFT
43,886422232393687040,2017-07-16 03:08:23,i give trust almost instantly but once it's br...,https://twitter.com/Gavin_John_NFT/status/8864...,Gavin_John_NFT
44,889380045567008768,2017-07-24 07:01:41,young justice art reveals new designs for seas...,https://twitter.com/MichaelM_NFT/status/889380...,MichaelM_NFT
45,890659950430638080,2017-07-27 19:47:34,"work and creativity aside, i pride myself most...",https://twitter.com/ALEXI_NFT/status/890659950...,ALEXI_NFT
...,...,...,...,...,...
26056,1522723418252193799,2022-05-06 23:42:08,congratulations to our previous winners time f...,https://twitter.com/BOREDAPEKENN3l/status/1522...,BOREDAPEKENN3l
26057,1522726977618137088,2022-05-06 23:56:17,"i'll sell my opensea username for 5eth, haha, ...",https://twitter.com/Brennen308/status/15227269...,Brennen308
26058,1522733542509367298,2022-05-07 00:22:22,congratulations to our previous winners time f...,https://twitter.com/BOREDAPEKENN3l/status/1522...,BOREDAPEKENN3l
26059,1522738109556817920,2022-05-07 00:40:31,filmed our first podcast last night. product...,https://twitter.com/Wardogs_NFT/status/1522738...,Wardogs_NFT


In [12]:
all_tweet_content = '\n'.join(vis5_df["content"])
with open('{}/vis5_all_tweet_content.txt'.format(VIS_DATA_PATH), 'w') as f:
    f.write(all_tweet_content)


### Sentiment Analysis

In [6]:
# !pip install flair

In [14]:
from flair.models import TextClassifier
from flair.data import Sentence
sia = TextClassifier.load('en-sentiment')


def flair_prediction(x):
    sentence = Sentence(x)
    sia.predict(sentence)
    score = sentence.labels[0]
    if "POSITIVE" in str(score):
        return "pos"
    elif "NEGATIVE" in str(score):
        return "neg"
    else:
        return "neu"


vis5_df["sentiment"] = vis5_df["content"].apply(flair_prediction)
vis5_df

2022-05-07 17:39:24,643 https://nlp.informatik.hu-berlin.de/resources/models/sentiment-curated-distilbert/sentiment-en-mix-distillbert_4.pt not found in cache, downloading to /var/folders/vk/qtf7k0110x7bb94g6sd3655r0000gn/T/tmp7sobgnpj


100%|██████████| 265512723/265512723 [05:08<00:00, 861255.37B/s] 

2022-05-07 17:44:34,318 copying /var/folders/vk/qtf7k0110x7bb94g6sd3655r0000gn/T/tmp7sobgnpj to cache at /Users/brucez/.flair/models/sentiment-en-mix-distillbert_4.pt





2022-05-07 17:44:34,737 removing temp file /var/folders/vk/qtf7k0110x7bb94g6sd3655r0000gn/T/tmp7sobgnpj
2022-05-07 17:44:34,775 loading file /Users/brucez/.flair/models/sentiment-en-mix-distillbert_4.pt


Downloading: 100%|██████████| 28.0/28.0 [00:00<00:00, 12.0kB/s]
Downloading: 100%|██████████| 483/483 [00:00<00:00, 172kB/s]
Downloading: 100%|██████████| 226k/226k [00:01<00:00, 163kB/s]  
Downloading: 100%|██████████| 455k/455k [00:02<00:00, 162kB/s]  


Unnamed: 0,id,date,content,url,username,sentiment
41,881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only t...,https://twitter.com/Gavin_John_NFT/status/8810...,Gavin_John_NFT,pos
42,882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/8827...,Gavin_John_NFT,neg
43,886422232393687040,2017-07-16 03:08:23,i give trust almost instantly but once it's br...,https://twitter.com/Gavin_John_NFT/status/8864...,Gavin_John_NFT,neg
44,889380045567008768,2017-07-24 07:01:41,young justice art reveals new designs for seas...,https://twitter.com/MichaelM_NFT/status/889380...,MichaelM_NFT,pos
45,890659950430638080,2017-07-27 19:47:34,"work and creativity aside, i pride myself most...",https://twitter.com/ALEXI_NFT/status/890659950...,ALEXI_NFT,neg
...,...,...,...,...,...,...
26056,1522723418252193799,2022-05-06 23:42:08,congratulations to our previous winners time f...,https://twitter.com/BOREDAPEKENN3l/status/1522...,BOREDAPEKENN3l,pos
26057,1522726977618137088,2022-05-06 23:56:17,"i'll sell my opensea username for 5eth, haha, ...",https://twitter.com/Brennen308/status/15227269...,Brennen308,neg
26058,1522733542509367298,2022-05-07 00:22:22,congratulations to our previous winners time f...,https://twitter.com/BOREDAPEKENN3l/status/1522...,BOREDAPEKENN3l,pos
26059,1522738109556817920,2022-05-07 00:40:31,filmed our first podcast last night. product...,https://twitter.com/Wardogs_NFT/status/1522738...,Wardogs_NFT,pos


In [15]:
vis5_df.to_csv('{}/vis5_df.csv'.format(VIS_DATA_PATH), index=False)

In [8]:
vis5_df = pd.read_csv('{}/vis5_df.csv'.format(VIS_DATA_PATH), index_col=0, lineterminator='\n')
vis5_df

Unnamed: 0_level_0,date,content,url,username,sentiment
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
881049753169010688,2017-07-01 07:20:04,once you're through those gym doors the only t...,https://twitter.com/Gavin_John_NFT/status/8810...,Gavin_John_NFT,pos
882776637422448640,2017-07-06 01:42:06,i don't trust people who age without wrinkles 😂,https://twitter.com/Gavin_John_NFT/status/8827...,Gavin_John_NFT,neg
886422232393687040,2017-07-16 03:08:23,i give trust almost instantly but once it's br...,https://twitter.com/Gavin_John_NFT/status/8864...,Gavin_John_NFT,neg
889380045567008768,2017-07-24 07:01:41,young justice art reveals new designs for seas...,https://twitter.com/MichaelM_NFT/status/889380...,MichaelM_NFT,pos
890659950430638080,2017-07-27 19:47:34,"work and creativity aside, i pride myself most...",https://twitter.com/ALEXI_NFT/status/890659950...,ALEXI_NFT,neg
...,...,...,...,...,...
1522723418252193799,2022-05-06 23:42:08,congratulations to our previous winners time f...,https://twitter.com/BOREDAPEKENN3l/status/1522...,BOREDAPEKENN3l,pos
1522726977618137088,2022-05-06 23:56:17,"i'll sell my opensea username for 5eth, haha, ...",https://twitter.com/Brennen308/status/15227269...,Brennen308,neg
1522733542509367298,2022-05-07 00:22:22,congratulations to our previous winners time f...,https://twitter.com/BOREDAPEKENN3l/status/1522...,BOREDAPEKENN3l,pos
1522738109556817920,2022-05-07 00:40:31,filmed our first podcast last night. product...,https://twitter.com/Wardogs_NFT/status/1522738...,Wardogs_NFT,pos
