In [1]:
import time
import json
import pandas as pd
import numpy as np

In [2]:
# PATHs

## The directory where the dune data is stored
DUNE_DATA_PATH = '../dune_data'

## The directory where the cryptopunk data with attributes info is stored
CSV_PATH = '../cp'

## The directory where the three databases are stored
DATABASE_PATH = '../database'

## The directory where the cryptopunk images is stored
PUNK_IMG_PATH='../punk_imgs'

## The directory where the visualization data for each visualization is stored
VIS_DATA_PATH = '../vis_data'

## The directory where the scraped tweets data is stored
TWEET_PATH = '../tweets'

In [3]:
# read transaction database
tx_db = pd.read_csv('{}/tx_db.csv'.format(DATABASE_PATH), index_col=0)

# read cryptopunk database
punk_db = pd.read_csv('{}/punk_db.csv'.format(DATABASE_PATH), index_col=0)
punk_db['attributes'] = punk_db['attributes'].apply(eval)

# read trader database
trader_db = pd.read_csv('{}/trader_db.csv'.format(DATABASE_PATH), index_col=0)


### 1. Sankey Diagram

<img src="../imgs/vis1_example.png" alt="drawing" width="500">

Data format

```
{
    "nodes":[
        {"node":0,"name":"Human"},
        {"node":1,"name":"Zombie"},
        {"node":2,"name":"Ape"},
        {"node":3,"name":"Alien"},

        {"node":4,"name":"Female"},
        {"node":5,"name":"Male"},

        {"node":6,"name":"Medium"},
        {"node":7,"name":"Dark"},
        {"node":8,"name":"Light"},
        {"node":9,"name":"Albino"},
        {"node":10,"name":"non-human"},

        {"node":11,"name":"Beanie"},
        ...
        {"node":97,"name":"Stringy Hair"}
    ],

    "links":[
        {"source":<node_id>, "target":<node_id>, "value":<count>},
        ...
    ]
}
```

In [4]:
def between_skin_tone_and_attributes(item, source_skin_tone, target_attribute):
    return (item['skin_tone'] == source_skin_tone) and (target_attribute in item['attributes'])


def get_dataset_vis1(tx_db, punk_db, trader_db):
    dataset_vis1 = dict()

    # ---------------------------------------
    # nodes
    # node_names_nested_list = list()
    # get the list of all types
    # ['Human', 'Zombie', 'Ape', 'Alien']
    # type_list = list(punk_db['type'].unique())
    # node_names_nested_list.append(type_list)

    # get the list of all genders
    # gender_list = list(punk_db['gender'].unique()
    #                    )          # ['Female', 'Male']
    # node_names_nested_list.append(gender_list)

    # get the list of all skin_stones
    # ['Medium', 'Dark', 'Light', 'Albino', '']
    # skin_tone_list = list(punk_db['skin_tone'].unique())
    # skin_tone_list = [st for st in skin_tone_list if not pd.isna(st)]
    # print(skin_tone_list)
    # skin_tone_list.remove('')
    # skin_tone_list = [
    #     x if x != '' else 'Unknown_skin_tone' for x in skin_tone_list]
    # node_names_nested_list.append(skin_tone_list)

    # get the list of all attributes
    # attribute_list = list(punk_db.attributes)
    # attribute_list = list(
    #     set([item for elem in attribute_list for item in elem]))
    # attribute_list = sorted(attribute_list)
    # attribute_list.remove('')
    # attribute_list = [
    #     x if x != '' else 'No attributes' for x in attribute_list]
    # node_names_nested_list.append(attribute_list)

    # set node_id and node_name
    # node_list = list()

    # node_names = type_list + gender_list + skin_tone_list + attribute_list
    # for name in node_names:
    #     node_list.append({'node': len(node_list), 'name': name})

    # dataset_vis1['nodes'] = node_list
    # ---------------------------------------
    
    # get the name for each node
    node_names_nested_list = list()
    
    type_list = list(punk_db['type'].unique())
    node_names_nested_list.append(type_list)

    gender_list = list(punk_db['gender'].unique()) 
    node_names_nested_list.append(gender_list)
    
    skin_tone_list = list(punk_db['skin_tone'].unique())
    skin_tone_list = [st for st in skin_tone_list if not pd.isna(st)]
    node_names_nested_list.append(skin_tone_list)
    
    attribute_list = list(punk_db.attributes)
    attribute_list = list(
        set([item for elem in attribute_list for item in elem]))
    attribute_list = sorted(attribute_list)
    attribute_list.remove('')
    node_names_nested_list.append(attribute_list)
    
    node_names = type_list + gender_list + skin_tone_list + attribute_list

    # index each node name
    node_dict = {node_names[i]:i for i in range(len(node_names))}
    
    # save to dict
    dataset_vis1['node'] = node_names
    print('> Saved: node')
    
    

    # links
    levels_list = ['type', 'gender', 'skin_tone']
    
    source_li = list()
    target_li = list()
    value_li = list()
    # links between each pair of neighbor levels
    for i in range(len(levels_list)-1):
        source_level = levels_list[i]
        source_name_list = node_names_nested_list[i]

        target_level = levels_list[i+1]
        target_name_list = node_names_nested_list[i+1]

        for source in source_name_list:
            for target in target_name_list:
                value = punk_db[(punk_db[source_level] == source) & (
                    punk_db[target_level] == target)].shape[0]
                
                source_li.append(node_dict[source])
                target_li.append(node_dict[target])
                value_li.append(value)
                
                # link_list.append(
                #     {'source': source, 'target': target, 'value': value})
    print('>> Saved: links between type, gender, and skin_tone')

    # links between skin_tone and attributes
    for source in skin_tone_list:
        for target in attribute_list:
            value = punk_db[punk_db.apply(lambda x: between_skin_tone_and_attributes(
                x, source, target), axis=1)].shape[0]
            
            source_li.append(node_dict[source])
            target_li.append(node_dict[target])
            value_li.append(value)
            
            # link_list.append(
            #     {'source': source, 'target': target, 'value': value})
    print('>> Saved: links between skin_tone and attributes')
    
    
    
    
    
    # link_list = list()
    # levels_list = ['type', 'gender', 'skin_tone']
    
    # source_li = list()
    # # links between each pair of neighbor levels
    # for i in range(len(levels_list)-1):
    #     source_level = levels_list[i]
    #     source_name_list = node_names_nested_list[i]

    #     target_level = levels_list[i+1]
    #     target_name_list = node_names_nested_list[i+1]

    #     for source in source_name_list:
    #         for target in target_name_list:
    #             value = punk_db[(punk_db[source_level] == source) & (
    #                 punk_db[target_level] == target)].shape[0]
    #             link_list.append(
    #                 {'source': source, 'target': target, 'value': value})
    # print('>> Saved: links between type, gender, and skin_tone')

    # # links between skin_tone and attributes
    # for source in skin_tone_list:
    #     for target in attribute_list:
    #         value = punk_db[punk_db.apply(lambda x: between_skin_tone_and_attributes(
    #             x, source, target), axis=1)].shape[0]
    #         link_list.append(
    #             {'source': source, 'target': target, 'value': value})
    # print('>> Saved: links between skin_tone and attributes')

    dataset_vis1['link'] = {'source': source_li, 'target': target_li, 'value': value_li}
    print('> Saved: link')

    return dataset_vis1


vis1_data = get_dataset_vis1(tx_db, punk_db, trader_db)
with open('{}/vis1_data.json'.format(VIS_DATA_PATH), 'w') as f:
    json.dump(vis1_data, f)
vis1_data


> Saved: node
>> Saved: links between type, gender, and skin_tone
>> Saved: links between skin_tone and attributes
> Saved: link


{'node': ['Human',
  'Zombie',
  'Ape',
  'Alien',
  'Female',
  'Male',
  'Medium',
  'Dark',
  'Light',
  'Albino',
  '3D Glasses',
  'Bandana',
  'Beanie',
  'Big Beard',
  'Big Shades',
  'Black Lipstick',
  'Blonde Bob',
  'Blonde Short',
  'Blue Eye Shadow',
  'Buck Teeth',
  'Cap',
  'Cap Forward',
  'Chinstrap',
  'Choker',
  'Cigarette',
  'Classic Shades',
  'Clown Eyes Blue',
  'Clown Eyes Green',
  'Clown Hair Green',
  'Clown Nose',
  'Cowboy Hat',
  'Crazy Hair',
  'Dark Hair',
  'Do-rag',
  'Earring',
  'Eye Mask',
  'Eye Patch',
  'Fedora',
  'Front Beard',
  'Front Beard Dark',
  'Frown',
  'Frumpy Hair',
  'Goat',
  'Gold Chain',
  'Green Eye Shadow',
  'Half Shaved',
  'Handlebars',
  'Headband',
  'Hoodie',
  'Horned Rim Glasses',
  'Hot Lipstick',
  'Knitted Cap',
  'Luxurious Beard',
  'Medical Mask',
  'Messy Hair',
  'Mohawk',
  'Mohawk Dark',
  'Mohawk Thin',
  'Mole',
  'Mustache',
  'Muttonchops',
  'Nerd Glasses',
  'Normal Beard',
  'Normal Beard Black',
  