In [37]:
import time
import json
import pandas as pd
import numpy as np

In [38]:
# PATHs

## The directory where the dune data is stored
DUNE_DATA_PATH = '../dune_data'

## The directory where the cryptopunk data with attributes info is stored
CSV_PATH = '../cp'

## The directory where the three databases are stored
DATABASE_PATH = '../database'

## The directory where the cryptopunk images is stored
PUNK_IMG_PATH='../punk_imgs'

## The directory where the visualization data for each visualization is stored
VIS_DATA_PATH = '../vis_data'

## The directory where the scraped tweets data is stored
TWEET_PATH = '../tweets'

In [39]:
# read transaction database
tx_db = pd.read_csv('{}/tx_db.csv'.format(DATABASE_PATH), index_col=0)

# read cryptopunk database
punk_db = pd.read_csv('{}/punk_db.csv'.format(DATABASE_PATH), index_col=0)
punk_db['attributes'] = punk_db['attributes'].apply(eval)

# read trader database
trader_db = pd.read_csv('{}/trader_db.csv'.format(DATABASE_PATH), index_col=0)


### 3. Bubble Plot

<img src="../imgs/vis3_example.png" alt="drawing" width="500">

Data format

```
Index
0   tx: a single transaction

Column
1   tx_id
2   date
3   price
4   bubble_size: the attribute count of the punk
5   bubble_color: decided by the skin tone of the punk
6   from: trader_id
7   to: trader_id
8   punk_id
9   punk_img: the image directory of the punk
```

In [42]:
def set_bubble_color(x):
    if x == 'Dark':
        return '#A4031F'
    elif x == 'Medium':
        return '#DB9065'
    elif x == 'Light':
        return '#F2A359'
    else: 
        return '#F2DC5D'
    

def get_dataset_vis3(tx_db, punk_db, trader_db):
    vis3_df = tx_db.copy(deep=True)
    
    # drop the punk transfer transactions
    vis3_df.dropna(axis=0, inplace=True)
    
    # set date
    vis3_df['date'] = vis3_df['date_time'].apply(lambda x: x.split(' ')[0])

    # set punk_skin_color as bubble color
    vis3_df['punk_skin_color'] = vis3_df['punk_id'].apply(lambda x: punk_db.loc[x, 'skin_tone'])
    vis3_df['bubble_color'] = vis3_df['punk_skin_color'].apply(set_bubble_color)
    
    # set punk_attr_count as bubble size
    vis3_df['punk_attr_count'] = vis3_df['punk_id'].apply(lambda x: punk_db.loc[x, 'attr_count'])
    vis3_df.rename(columns={'punk_attr_count':'bubble_size', 'eth_price':'price'}, inplace=True)
    
    # get punk image directory
    vis3_df['punk_img'] = vis3_df['punk_id'].apply(lambda x: punk_db.loc[x, 'img_dir'])
    # vis3_df['punk_img'] = vis3_df['punk_id'].apply(lambda x: punk_db.loc[x, 'img_dir'][1:])

    
    return vis3_df.loc[:, ['date',  'price', 'bubble_size', 'bubble_color', 'from', 'to', 'punk_id', 'punk_img']]
    
vis3_df = get_dataset_vis3(tx_db, punk_db, trader_db)
vis3_df.to_csv('{}/vis3_data.csv'.format(VIS_DATA_PATH))
vis3_df

Unnamed: 0_level_0,date,price,bubble_size,bubble_color,from,to,punk_id,punk_img
tx_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,2017-06-23,0.01,3,#DB9065,7070,363,3134,../punk_imgs/3134.png
1,2017-06-23,0.04,3,#DB9065,7070,578,5719,../punk_imgs/5719.png
2,2017-06-23,0.10,4,#F2DC5D,363,724,5056,../punk_imgs/5056.png
4,2017-06-23,0.06,3,#F2A359,7070,578,5624,../punk_imgs/5624.png
5,2017-06-23,0.03,4,#F2DC5D,7070,578,6548,../punk_imgs/6548.png
...,...,...,...,...,...,...,...,...
40527,2022-05-04,61.50,3,#DB9065,5456,89,1648,../punk_imgs/1648.png
40530,2022-05-05,66.66,3,#F2DC5D,5977,8585,2625,../punk_imgs/2625.png
40535,2022-05-05,70.00,2,#DB9065,7348,6009,3314,../punk_imgs/3314.png
40556,2022-05-06,0.00,3,#DB9065,2287,6510,3095,../punk_imgs/3095.png
