## Load code

This is boilerplate code for all jupyter notebooks which loads all python modules in `/src` folder.

In [15]:
import sys
from pathlib import Path

repo_dir = Path('..').resolve()
assert repo_dir.name == 'hicetnunc-dataset', repo_dir
if str(repo_dir) not in sys.path:
    sys.path.append(str(repo_dir))

import src.reload; src.reload.reload()

## Load intermediate data

In [16]:
tokens_db = src.utils.read_json(src.config.tokens_db_json_file)
swaps_db = src.utils.read_json(src.config.swaps_db_json_file)
addrs_db = src.utils.read_json(src.config.addrs_db_json_file)

nft_state_log = src.utils.read_json(src.config.nft_state_log_file)
ah_state_log = src.utils.read_json(src.config.ah_state_log_file)
money_state_log = src.utils.read_json(src.config.money_state_log_file)

tr_info_db = src.tr.info_db.TrInfoDB()

## Join contracts logs with transactions

In [17]:
from collections import defaultdict
from tqdm import tqdm


transactions_index = defaultdict(lambda: {
    'nft_log_entry': None,
    'ah_log_entry': None,
    'money_log_entry': None,
    'created_swap': None,
    'created_token': None,
})


for entries, row_id_field, entry_field in [
    (nft_state_log, 'row_id', 'nft_log_entry'),
    (ah_state_log, 'row_id', 'ah_log_entry'),
    (money_state_log, 'row_id', 'money_log_entry'),
    (swaps_db.values(), 'created_row_id', 'created_swap'),
    (tokens_db.values(), 'mint_ah_row_id', 'created_token'),
]:
    for entry in entries:
        tr_hash = tr_info_db.get_tr_info_by_row_id(entry[row_id_field])['hashc']
        assert transactions_index[tr_hash][entry_field] is None
        transactions_index[tr_hash][entry_field] = entry
        entry['tr_index_entry'] = transactions_index[tr_hash]

## Check, that every art house log entry has corresponding nft log entry

In [18]:
for ah_log_entry in ah_state_log:
    entry_method = ah_log_entry['method']
    nft_log_entry = ah_log_entry['tr_index_entry']['nft_log_entry']
    if entry_method in ['apply_collect', 'apply_swap', 'apply_cancel_swap']:
        assert nft_log_entry['method'] == 'apply_transfer'
    elif entry_method == 'apply_mint':
        assert nft_log_entry['method'] == 'apply_mint'
    else:
        assert False, entry_method

## Prepare datasets objects

In [19]:
class FieldsGroup:
    def expand_fields(self, prefix):
        raise NotImplementedError()


class TrEvent(FieldsGroup):
    def __init__(self, row_id=-1):
        self.row_id = row_id

    def set_row_id(self, row_id):
        self.row_id = row_id

    def expand_fields(self, prefix):
        info = tr_info_db.get_tr_info_by_row_id(self.row_id)
        return {
            f'{prefix}_iso_date': info['iso_date'],
            f'{prefix}_stamp': info['stamp'],
            f'{prefix}_hash': info['hash'],
            f'{prefix}_row_id': info['row_id'],
        }


class FloatSet(FieldsGroup):
    def __init__(self, values=None):
        self.zero_count = 0
        self.values = values or []

    def add(self, v):
        if v == 0:
            self.zero_count += 1
        else:
            self.values.append(v)

    def expand_fields(self, prefix):
        prefix1 = '_'.join(prefix.split('_')[:-1])
        prefix2 = prefix
        if len(self.values) == 0:
            return {
                f'{prefix1}_count': 0,
                f'{prefix1}_zero_count': self.zero_count,
                f'{prefix2}_min': 0,
                f'{prefix2}_max': 0,
                f'{prefix2}_sum': 0,
                f'{prefix2}_avg': 0,
            }
        else:
            return {
                f'{prefix1}_count': len(self.values),
                f'{prefix1}_zero_count': self.zero_count,
                f'{prefix2}_min': min(self.values),
                f'{prefix2}_max': max(self.values),
                f'{prefix2}_sum': sum(self.values),
                f'{prefix2}_avg': sum(self.values) / len(self.values),
            }


In [20]:
tokens_ds = {}
for token_db_entry in tokens_db.values():
    tokens_ds[str(token_db_entry['token_id'])] = {
        'token_id': str(token_db_entry['token_id']),
        'creator': token_db_entry['creator'],
        'mint_count': token_db_entry['mint_count'],
        'mint': TrEvent(token_db_entry['mint_row_id']),
        'artifact_ipfs': src.ipfs.validate_ipfs_uri(token_db_entry['artifact_ipfs']),
        'artifact_mime': token_db_entry['artifact_mime'],
        'artifact_file_size': token_db_entry['artifact_file_size'],
        'info_title': token_db_entry['name'],
        'info_description': token_db_entry['description'],
        'info_tags': token_db_entry['tags'],
        'author_sold_prices': FloatSet(),
        'secondary_sold_prices': FloatSet(),
        'author_sold_prices': FloatSet(),
        'available_prices': FloatSet(),
        'burn_count': 0,
        'author_owns_count': 0,
        'other_own_count': 0,
        'author_sent_count': 0,
        'info_ipfs': src.ipfs.validate_ipfs_uri(token_db_entry['info_ipfs']),
        'display_uri_ipfs': (
            src.ipfs.validate_ipfs_uri(token_db_entry['display_uri_ipfs'])
            if token_db_entry['display_uri_ipfs'] else ''
        ),
        'royalties': token_db_entry['royalties'] / 10,
        'info_creator': token_db_entry['meta_creator'],
        'mint_ah_row_id': token_db_entry['mint_ah_row_id'],
    }

In [22]:
addrs_ds = {}
for addr_db_entry in addrs_db.values():
    addrs_ds[addr_db_entry['address']] = {
        'address': addr_db_entry['address'],
        'first_action': TrEvent(addr_db_entry['first_op_row_id']),
        'tzkt_info_name': addr_db_entry.get('tzkt_info_name', ''),
        'tzkt_info_twitter': addr_db_entry.get('tzkt_info_twitter', ''),
        'tzkt_info_email': addr_db_entry.get('tzkt_info_email', ''),
        'tzkt_info_instagram': addr_db_entry.get('tzkt_info_instagram', ''),
        'tzkt_info_site': addr_db_entry.get('tzkt_info_site', ''),
        'tzkt_info_description': addr_db_entry.get('tzkt_info_description', ''),
        'tzkt_info_github': addr_db_entry.get('tzkt_info_github', ''),
        'tzkt_info_telegram': addr_db_entry.get('tzkt_info_telegram', ''),
        'tzkt_info_facebook': addr_db_entry.get('tzkt_info_facebook', ''),
        'tzkt_info_reddit': addr_db_entry.get('tzkt_info_reddit', ''),
        'bought_prices': FloatSet(),
        'author_sold_prices': FloatSet(),
        'secondary_sold_prices': FloatSet(),
        'available_prices': FloatSet(),
        'in_op_count': addr_db_entry['in_op_count'],
        'out_op_count': addr_db_entry['out_op_count'],
        'money_received': addr_db_entry['money_received'],
        'money_sent': addr_db_entry['money_sent'],
        'first_op_has_reveal': int(addr_db_entry['first_op_has_reveal']),
    }

In [23]:
sells_ds = {}
transfers_ds = {}

In [24]:
swaps_ds = {}
for swap_db_entry in swaps_db.values():
    swaps_ds[str(swap_db_entry['swap_id'])] = {
        'swap_id': str(swap_db_entry['swap_id']),
        'token_id': str(swap_db_entry['token_id']),
        'price': swap_db_entry['price'] / 1e6,
        'total_count': swap_db_entry['initial_count'],
        'created': TrEvent(swap_db_entry['created_row_id']),
        'closed': TrEvent(),
        'is_secondary': 1,
        'created_by': '',
        'sold_count': 0,
        'available_count': 0,
        'returned_count': 0,
        'sold_price_sum': 0,
    }

In [25]:
token_db_entry

{'token_id': 19619,
 'creator': 'tz1TSWEDs9wcBx2KiRzVzyzECsNpRiZaLJ1D',
 'mint_tokens_receiver': 'tz1TSWEDs9wcBx2KiRzVzyzECsNpRiZaLJ1D',
 'info_ipfs': 'ipfs://QmVZRfawGSjroJiH6k1uGa9zNR5VSxEMDSvAvhoBXTb6as',
 'mint_count': 3,
 'mint_row_id': 44651504,
 'royalties': 100,
 'mint_ah_row_id': 44651503,
 'artifact_mime': 'model/gltf-binary',
 'artifact_ipfs': 'ipfs://QmayQGmkYnwrmY4Q72fp3T8wg6fK3YANBMk4RgvmVd44mN',
 'meta_creator': 'tz1TSWEDs9wcBx2KiRzVzyzECsNpRiZaLJ1D',
 'display_uri_ipfs': '',
 'tags': 'Processing\nCreativecoding\nGenerativegeometry\n@wblut',
 'name': 'Part 0001',
 'description': 'glTF Generative geometry by Frederik Vanhoutte,@wblut',
 'artifact_file_size': 1472104,
 'display_uri_file_size': -1,
 'tr_index_entry': {'nft_log_entry': {'method': 'apply_mint',
   'row_id': 44651504,
   'token_id': 19619,
   'count': 3,
   'creator': 'tz1TSWEDs9wcBx2KiRzVzyzECsNpRiZaLJ1D',
   'tokens_receiver': 'tz1TSWEDs9wcBx2KiRzVzyzECsNpRiZaLJ1D',
   'info_ipfs': 'ipfs://QmVZRfawGSjroJiH6k

## Analyze nft log

In [26]:
art_house_addr = 'KT1Hkg5qeNhfwpKW4fXvq7HGZB9z2EnmCCA9'
trash_addr = 'tz1burnburnburnburnburnburnburjAYjjX'

from collections import Counter, defaultdict

author_stats = {}

total_stats = Counter()
total_stats['cur_sold_count'] = 0
total_stats['cur_sold_volume'] = 0
total_stats['cur_author2swap_count'] = 0
total_stats['cur_swap2author_count'] = 0
total_stats['cur_author2other_count'] = 0
total_stats['cur_other2swap_count'] = 0
total_stats['cur_swap2other_count'] = 0
total_stats['cur_other2author_count'] = 0
total_stats['cur_author2trash_count'] = 0
total_stats['cur_other2trash_count'] = 0
total_stats['cur_other2other_count'] = 0


purchases_log = []


for token_entry in tokens_db.values():
    token_entry['cur_sold_count'] = 0
    token_entry['cur_sold_volume'] = 0
    token_entry['cur_author2swap_count'] = 0
    token_entry['cur_swap2author_count'] = 0
    token_entry['cur_author2other_count'] = 0
    token_entry['cur_other2swap_count'] = 0
    token_entry['cur_swap2other_count'] = 0
    token_entry['cur_other2author_count'] = 0
    token_entry['cur_author2trash_count'] = 0
    token_entry['cur_other2trash_count'] = 0
    token_entry['cur_other2other_count'] = 0

    token_entry['cur_author_count'] = 0
    token_entry['cur_swap_count'] = 0
    token_entry['cur_trash_count'] = 0
    token_entry['cur_other_count'] = 0

for swap_entry in swaps_db.values():
    swap_entry['cur_state'] = 'not_created'
    swap_entry['cur_sold_count'] = 0
    swap_entry['closed_stamp'] = -1
    swap_entry['closed_iso_date'] = ''
    swap_entry['closed_row_id'] = -1
    swap_entry['closed_hash'] = ''


for nft_log_entry in nft_state_log:
    entry_method = nft_log_entry['method']
    assert entry_method in ['apply_transfer', 'apply_mint'], entry_method

    entry_volume = nft_log_entry['volume']
    ah_log_entry = tr_hash_to_ah_log_entry.get(nft_log_entry['hash'])
    money_log_entry = tr_hash_to_money_log_entry.get(nft_log_entry['hash'])

    if entry_method == 'apply_mint':
        assert entry_volume == 0
        assert ah_log_entry['method'] == 'apply_mint'

        entry_token_id = nft_log_entry['token_id']
        entry_count = nft_log_entry['count']
        assert ah_log_entry['count'] == nft_log_entry['count']
        assert ah_log_entry['creator'] == nft_log_entry['creator']
        token_entry = tokens_db[str(entry_token_id)]

        total_stats['cur_author_count'] += entry_count
        token_entry['cur_author_count'] += entry_count
        continue

    for entry_tx in nft_log_entry['txs']:
        tx_from = entry_tx['from']
        tx_to = entry_tx['to']
        tx_token_id = int(entry_tx['token_id'])
        tx_count = int(entry_tx['count'])
        
        if tx_count == 0:
            continue

        token_entry = tokens_db[str(tx_token_id)]
        token_creator = token_entry['creator']
        
        tr_class = None
        
        assert tx_from != tx_to

        if tx_from == token_creator:
            assert entry_volume == 0

            if tx_to == art_house_addr:
                if not ah_log_entry:
                    tr_class = ('author', 'trash')
                    assert entry_volume == 0

                else:
                    tr_class = ('author', 'swap')
                    assert ah_log_entry['method'] == 'apply_swap'
                    assert entry_volume == 0
                    swap_entry = swaps_by_row_id[ah_log_entry['row_id']]
                    assert swap_entry['cur_state'] == 'not_created'
                    swap_entry['cur_state'] = 'active'

            elif tx_to == trash_addr:
                tr_class = ('author', 'trash')
                assert ah_log_entry is None
                assert entry_volume == 0

            else:
                tr_class = ('author', 'other')
                assert ah_log_entry is None
                assert entry_volume == 0

        elif tx_from == art_house_addr:
            if tx_to == token_creator:
                tr_class = ('swap', 'author')
                assert ah_log_entry['method'] == 'apply_cancel_swap'
                assert entry_volume == 0
                swap_entry = swaps_db[str(ah_log_entry['swap_id'])]
                assert swap_entry['cur_state'] == 'active'
                swap_entry['cur_state'] = 'closed'

            elif tx_to == trash_addr:
                assert False

            else:
                tr_class = ('swap', 'other')
                assert ah_log_entry['method'] == 'apply_collect'
                assert entry_volume >= 0
                swap_entry = swaps_db[str(ah_log_entry['swap_id'])]
                assert swap_entry['cur_state'] == 'active'
                assert ah_log_entry['count'] == tx_count

                assert len(nft_log_entry['txs']) == 1

                if swap_entry['price'] == 0:
                    assert money_log_entry is None

                else:
                    assert money_log_entry
                    assert money_log_entry['token_id'] == swap_entry['token_id']
                    assert money_log_entry['token_count'] == tx_count
                    assert abs(round(money_log_entry['price'] * 1e6) - swap_entry['price'] * tx_count) <= 1, (
                        round(money_log_entry['price'] * 1e6),
                        swap_entry['price'],
                    )
                    assert money_log_entry['beneficiary'] == token_creator

                purchases_log.append({
                    'token_id': swap_entry['token_id'],
                    'swap_id': swap_entry['swap_id'],
                    'price': swap_entry['price'],
                    'token_count': tx_count,
                    'token_creator': token_creator,
                })

        else:
            assert entry_volume == 0

            if tx_to == token_creator:
                tr_class = ('other', 'author')
                assert ah_log_entry is None

            elif tx_to == art_house_addr:
                # print(ah_log_entry)
                # print(nft_log_entry)
                # print(token_entry)
                # print()
                tr_class = ('other', 'swap')
                assert ah_log_entry['method'] == 'apply_swap'

            else:
                tr_class = ('other', 'other')
                assert ah_log_entry is None

        sender_class, receiver_class = tr_class
        for stats_entry in [token_entry, total_stats]:
            token_entry[f'cur_{sender_class}_count'] -= tx_count
            token_entry[f'cur_{receiver_class}_count'] += tx_count
            token_entry[f'cur_{sender_class}2{receiver_class}_count'] += tx_count


KeyError: 'volume'

## Write datasets files

In [93]:
import csv


def make_dataset(db, fpath):
    expanded_db = {}
    for row_id, row in db.items():
        new_row = {}
        for field, val in row.items():
            if isinstance(val, FieldsGroup):
                for exp_field, exp_val in val.expand_fields(field).items():
                    new_row[exp_field] = exp_val
            else:
                assert type(val) in [int, float, str]
                new_row[field] = val
        expanded_db[row_id] = new_row

    rows = list(expanded_db.values())
    cols_order = list(rows[0].keys())
    for row in rows:
        row_keys_order = list(row.keys())
        assert row_keys_order == cols_order, (row_keys_order, cols_order)

    with fpath.with_suffix('.csv').open('w', encoding='utf-8', newline='') as csv_file:
        csv_writer = csv.writer(
            csv_file,
            delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL,
        )
        csv_writer.writerow([col for col in cols_order])
        for row in rows:
            csv_writer.writerow([row[col] for col in cols_order])

    src.utils.write_json(expanded_db, fpath.with_suffix('.json'))



db = {
    'x': {
        'a': 1,
        'b': 'x and y',
        'event': TrEvent(42212375),
        'float_set': FloatSet([0, 1, 2, 3]),
    },
    'y': {
        'a': 2,
        'b': 'y',
        'event': TrEvent(42212375),
        'float_set': FloatSet([0, 2, 4, 8]),
    }
}

make_dataset(db, Path('../dataset/test'))

written 582 bytes 2 entries
