In [1]:
import time
import json
import pandas as pd
import numpy as np

In [2]:
# PATHs

## The directory where the dune data is stored
DUNE_DATA_PATH = '../dune_data'

## The directory where the cryptopunk data with attributes info is stored
CSV_PATH = '../cp'

## The directory where the three databases are stored
DATABASE_PATH = '../database'

## The directory where the cryptopunk images is stored
PUNK_IMG_PATH='../punk_imgs'

## The directory where the visualization data for each visualization is stored
VIS_DATA_PATH = '../vis_data'

## The directory where the scraped tweets data is stored
TWEET_PATH = '../tweets'

In [3]:
# read transaction database
tx_db = pd.read_csv('{}/tx_db.csv'.format(DATABASE_PATH), index_col=0)

# read cryptopunk database
punk_db = pd.read_csv('{}/punk_db.csv'.format(DATABASE_PATH), index_col=0)
punk_db['attributes'] = punk_db['attributes'].apply(eval)

# read trader database
trader_db = pd.read_csv('{}/trader_db.csv'.format(DATABASE_PATH), index_col=0)


### 1. Sankey Diagram

<img src="../imgs/vis1_example.png" alt="drawing" width="500">

Data format

```
{
    "nodes":[
        {"node":0,"name":"Human"},
        {"node":1,"name":"Zombie"},
        {"node":2,"name":"Ape"},
        {"node":3,"name":"Alien"},

        {"node":4,"name":"Female"},
        {"node":5,"name":"Male"},

        {"node":6,"name":"Medium"},
        {"node":7,"name":"Dark"},
        {"node":8,"name":"Light"},
        {"node":9,"name":"Albino"},
        {"node":10,"name":"non-human"},

        {"node":11,"name":"Beanie"},
        ...
        {"node":97,"name":"Stringy Hair"}
    ],

    "links":[
        {"source":<node_id>, "target":<node_id>, "value":<count>},
        ...
    ]
}
```

In [5]:
def between_skin_tone_and_attributes(item, source_skin_tone, target_attribute):
    return (item['skin_tone'] == source_skin_tone) and (target_attribute in item['attributes'])


def get_dataset_vis1(tx_db, punk_db, trader_db):
    dataset_vis1 = dict()

    # ---------------------------------------
    # nodes
    # node_names_nested_list = list()
    # get the list of all types
    # ['Human', 'Zombie', 'Ape', 'Alien']
    # type_list = list(punk_db['type'].unique())
    # node_names_nested_list.append(type_list)

    # get the list of all genders
    # gender_list = list(punk_db['gender'].unique()
    #                    )          # ['Female', 'Male']
    # node_names_nested_list.append(gender_list)

    # get the list of all skin_stones
    # ['Medium', 'Dark', 'Light', 'Albino', '']
    # skin_tone_list = list(punk_db['skin_tone'].unique())
    # skin_tone_list = [st for st in skin_tone_list if not pd.isna(st)]
    # print(skin_tone_list)
    # skin_tone_list.remove('')
    # skin_tone_list = [
    #     x if x != '' else 'Unknown_skin_tone' for x in skin_tone_list]
    # node_names_nested_list.append(skin_tone_list)

    # get the list of all attributes
    # attribute_list = list(punk_db.attributes)
    # attribute_list = list(
    #     set([item for elem in attribute_list for item in elem]))
    # attribute_list = sorted(attribute_list)
    # attribute_list.remove('')
    # attribute_list = [
    #     x if x != '' else 'No attributes' for x in attribute_list]
    # node_names_nested_list.append(attribute_list)

    # set node_id and node_name
    # node_list = list()

    # node_names = type_list + gender_list + skin_tone_list + attribute_list
    # for name in node_names:
    #     node_list.append({'node': len(node_list), 'name': name})

    # dataset_vis1['nodes'] = node_list
    # ---------------------------------------
    
    # get the name for each node
    node_names_nested_list = list()
    
    type_list = list(punk_db['type'].unique())
    node_names_nested_list.append(type_list)

    gender_list = list(punk_db['gender'].unique()) 
    node_names_nested_list.append(gender_list)
    
    skin_tone_list = list(punk_db['skin_tone'].unique())
    skin_tone_list = [st for st in skin_tone_list if not pd.isna(st)]
    node_names_nested_list.append(skin_tone_list)
    
    punk_db['attr_count_str'] = punk_db['attr_count'].apply(lambda x: f'{x} attributes')
    attr_count_list = list(punk_db['attr_count_str'].unique())
    node_names_nested_list.append(attr_count_list)
    
    # attribute_list = list(punk_db.attributes)
    # attribute_list = list(
    #     set([item for elem in attribute_list for item in elem]))
    # attribute_list = sorted(attribute_list)
    # attribute_list.remove('')
    # node_names_nested_list.append(attribute_list)
    
    node_names = type_list + gender_list + skin_tone_list + attr_count_list

    # index each node name
    node_dict = {node_names[i]:i for i in range(len(node_names))}
    
    # save to dict
    dataset_vis1['node'] = node_names
    print('> Saved: node')
    
    

    # links
    levels_list = ['type', 'gender', 'skin_tone', 'attr_count_str']
    
    source_li = list()
    target_li = list()
    value_li = list()
    # links between each pair of neighbor levels
    for i in range(len(levels_list)-1):
        source_level = levels_list[i]
        source_name_list = node_names_nested_list[i]

        target_level = levels_list[i+1]
        target_name_list = node_names_nested_list[i+1]

        for source in source_name_list:
            for target in target_name_list:
                value = punk_db[(punk_db[source_level] == source) & (
                    punk_db[target_level] == target)].shape[0]
                
                source_li.append(node_dict[source])
                target_li.append(node_dict[target])
                value_li.append(value)
                
                
                
    # levels_list = ['type', 'gender', 'skin_tone']
    
    # source_li = list()
    # target_li = list()
    # value_li = list()
    # # links between each pair of neighbor levels
    # for i in range(len(levels_list)-1):
    #     source_level = levels_list[i]
    #     source_name_list = node_names_nested_list[i]

    #     target_level = levels_list[i+1]
    #     target_name_list = node_names_nested_list[i+1]

    #     for source in source_name_list:
    #         for target in target_name_list:
    #             value = punk_db[(punk_db[source_level] == source) & (
    #                 punk_db[target_level] == target)].shape[0]
                
    #             source_li.append(node_dict[source])
    #             target_li.append(node_dict[target])
    #             value_li.append(value)
                
    #             # link_list.append(
    #             #     {'source': source, 'target': target, 'value': value})
    # print('>> Saved: links between type, gender, and skin_tone')

    # # links between skin_tone and attributes
    # for source in skin_tone_list:
    #     for target in attribute_list:
    #         value = punk_db[punk_db.apply(lambda x: between_skin_tone_and_attributes(
    #             x, source, target), axis=1)].shape[0]
            
    #         source_li.append(node_dict[source])
    #         target_li.append(node_dict[target])
    #         value_li.append(value)
            
    #         # link_list.append(
    #         #     {'source': source, 'target': target, 'value': value})
    # print('>> Saved: links between skin_tone and attributes')
    
    
    
    
    
    # link_list = list()
    # levels_list = ['type', 'gender', 'skin_tone']
    
    # source_li = list()
    # # links between each pair of neighbor levels
    # for i in range(len(levels_list)-1):
    #     source_level = levels_list[i]
    #     source_name_list = node_names_nested_list[i]

    #     target_level = levels_list[i+1]
    #     target_name_list = node_names_nested_list[i+1]

    #     for source in source_name_list:
    #         for target in target_name_list:
    #             value = punk_db[(punk_db[source_level] == source) & (
    #                 punk_db[target_level] == target)].shape[0]
    #             link_list.append(
    #                 {'source': source, 'target': target, 'value': value})
    # print('>> Saved: links between type, gender, and skin_tone')

    # # links between skin_tone and attributes
    # for source in skin_tone_list:
    #     for target in attribute_list:
    #         value = punk_db[punk_db.apply(lambda x: between_skin_tone_and_attributes(
    #             x, source, target), axis=1)].shape[0]
    #         link_list.append(
    #             {'source': source, 'target': target, 'value': value})
    # print('>> Saved: links between skin_tone and attributes')

    dataset_vis1['link'] = {'source': source_li, 'target': target_li, 'value': value_li}
    print('> Saved: link')

    return dataset_vis1


vis1_data = get_dataset_vis1(tx_db, punk_db, trader_db)
with open('{}/vis1_data.json'.format(VIS_DATA_PATH), 'w') as f:
    json.dump(vis1_data, f)
vis1_data


Unnamed: 0_level_0,type,gender,skin_tone,attr_count,attributes,img_dir,avg_price,img_url
punk_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,Human,Female,Medium,3,"[Green Eye Shadow, Earring, Blonde Bob]",../punk_imgs/0.png,0.000000,https://www.larvalabs.com/cryptopunks/cryptopu...
1,Human,Male,Dark,2,"[Smile, Mohawk]",../punk_imgs/1.png,31.000000,https://www.larvalabs.com/cryptopunks/cryptopu...
2,Human,Female,Light,1,[Wild Hair],../punk_imgs/2.png,0.000000,https://www.larvalabs.com/cryptopunks/cryptopu...
3,Human,Male,Dark,3,"[Wild Hair, Nerd Glasses, Pipe]",../punk_imgs/3.png,0.000000,https://www.larvalabs.com/cryptopunks/cryptopu...
4,Human,Male,Medium,4,"[Big Shades, Wild Hair, Earring, Goat]",../punk_imgs/4.png,0.000000,https://www.larvalabs.com/cryptopunks/cryptopu...
...,...,...,...,...,...,...,...,...
9995,Human,Female,Albino,2,"[Purple Eye Shadow, Straight Hair Dark]",../punk_imgs/9995.png,0.000000,https://www.larvalabs.com/cryptopunks/cryptopu...
9996,Human,Male,Light,4,"[Cigarette, Earring, Crazy Hair, Smile]",../punk_imgs/9996.png,0.000000,https://www.larvalabs.com/cryptopunks/cryptopu...
9997,Zombie,Male,,2,"[Front Beard, Cap Forward]",../punk_imgs/9997.png,99.990000,https://www.larvalabs.com/cryptopunks/cryptopu...
9998,Human,Female,Medium,3,"[Wild White Hair, Black Lipstick, Clown Eyes G...",../punk_imgs/9998.png,54.333333,https://www.larvalabs.com/cryptopunks/cryptopu...


In [19]:
punk_db['attr_count_str'] = punk_db['attr_count'].apply(lambda x: f'{x} attributes')
attr_count_list = list(punk_db['attr_count_str'].unique())
attr_count_list = sorted(attr_count_list)
attr_count_list

['0 attributes',
 '1 attributes',
 '2 attributes',
 '3 attributes',
 '4 attributes',
 '5 attributes',
 '6 attributes',
 '7 attributes']

In [27]:
from plotly.colors import hex_to_rgb

In [30]:
def between_skin_tone_and_attributes(item, source_skin_tone, target_attribute):
    return (item['skin_tone'] == source_skin_tone) and (target_attribute in item['attributes'])

def convert_to_rgba(hex_color, a=0.5):
    return 'rgba({},{},{},{})'.format(*hex_to_rgb(hex_color), a)

def get_dataset_vis1(tx_db, punk_db, trader_db):
    dataset_vis1 = dict()
    
    node_name_color = dict()
    
    # get the name for each node
    node_names_nested_list = list()
    node_color_dict = {}
    
    ## ['Human', 'Zombie', 'Ape', 'Alien']
    type_list = list(punk_db['type'].unique())
    node_names_nested_list.append(type_list)
    node_color = ['#FFCF00', '#00916E', '#EE6123', '#FDDBC4']

    ## ['Female', 'Male']
    gender_list = list(punk_db['gender'].unique()) 
    node_names_nested_list.append(gender_list)
    node_color += ['#FF5C5C', '#5295CB']
    
    
    ## ['Medium', 'Dark', 'Light', 'Albino']
    skin_tone_list = list(punk_db['skin_tone'].unique())
    skin_tone_list = [st if not pd.isna(st) else 'Non-human' for st in skin_tone_list]
    node_names_nested_list.append(skin_tone_list)
    node_color += ['#DB9065', '#A4031F', '#F2A359', '#F2DC5D', '#8DFFCD']
    
    ## ['0 attributes', '1 attributes', '2 attributes', '3 attributes', '4 attributes', '5 attributes', '6 attributes', '7 attributes']
    punk_db['attr_count_str'] = punk_db['attr_count'].apply(lambda x: f'{x} attributes')
    attr_count_list = list(punk_db['attr_count_str'].unique())
    attr_count_list = sorted(attr_count_list)
    node_names_nested_list.append(attr_count_list)
    node_color += ['#EEF2FC', '#B7E5F2', '#98DAEC', '#41BBDC', '#239CBE', '#4455DA', '#2232AA', '#1B1367']
    
    node_names = type_list + gender_list + skin_tone_list + attr_count_list
    
    for i in range(len(node_names)):
        node_color_dict[node_names[i]] = node_color[i]

    # index each node name
    node_dict = {node_names[i]:i for i in range(len(node_names))}
    
    # save to dict
    node_name_color['name'] = node_names
    node_name_color['color'] = node_color
    node_name_color['dict'] = node_color_dict
    dataset_vis1['node'] = node_name_color
    print('> Saved: node')

    # links
    levels_list = ['type', 'gender', 'skin_tone', 'attr_count_str']
    
    source_li = list()
    target_li = list()
    value_li = list()
    link_color_li = list()
    # links between each pair of neighbor levels
    for i in range(len(levels_list)-1):
        source_level = levels_list[i]
        source_name_list = node_names_nested_list[i]

        target_level = levels_list[i+1]
        target_name_list = node_names_nested_list[i+1]

        for source in source_name_list:
            for target in target_name_list:
                value = punk_db[(punk_db[source_level] == source) & (
                    punk_db[target_level] == target)].shape[0]
                link_color = convert_to_rgba(node_color_dict[target])
                
                source_li.append(node_dict[source])
                target_li.append(node_dict[target])
                value_li.append(value)
                link_color_li.append(link_color)
    
    dataset_vis1['link'] = {'source': source_li, 'target': target_li, 'value': value_li, 'color': link_color_li}
    print('> Saved: link')

    return dataset_vis1


vis1_data = get_dataset_vis1(tx_db, punk_db, trader_db)
with open('{}/vis1_data.json'.format(VIS_DATA_PATH), 'w') as f:
    json.dump(vis1_data, f)
vis1_data


> Saved: node
> Saved: link


{'node': {'name': ['Human',
   'Zombie',
   'Ape',
   'Alien',
   'Female',
   'Male',
   'Medium',
   'Dark',
   'Light',
   'Albino',
   'Non-human',
   '0 attributes',
   '1 attributes',
   '2 attributes',
   '3 attributes',
   '4 attributes',
   '5 attributes',
   '6 attributes',
   '7 attributes'],
  'color': ['#FFCF00',
   '#00916E',
   '#EE6123',
   '#FDDBC4',
   '#FF5C5C',
   '#5295CB',
   '#DB9065',
   '#A4031F',
   '#F2A359',
   '#F2DC5D',
   '#8DFFCD',
   '#EEF2FC',
   '#B7E5F2',
   '#98DAEC',
   '#41BBDC',
   '#239CBE',
   '#4455DA',
   '#2232AA',
   '#1B1367'],
  'dict': {'Human': '#FFCF00',
   'Zombie': '#00916E',
   'Ape': '#EE6123',
   'Alien': '#FDDBC4',
   'Female': '#FF5C5C',
   'Male': '#5295CB',
   'Medium': '#DB9065',
   'Dark': '#A4031F',
   'Light': '#F2A359',
   'Albino': '#F2DC5D',
   'Non-human': '#8DFFCD',
   '0 attributes': '#EEF2FC',
   '1 attributes': '#B7E5F2',
   '2 attributes': '#98DAEC',
   '3 attributes': '#41BBDC',
   '4 attributes': '#239CBE',
   '