# CryptoPunk Database

## Relational Database

### 1. Transaction

- **tx_id**
- datetime
- from: trader_id
- to: trader_id
- ether
- punk_id: punk_id

### 2. Trader

- **trader_id**
- address
- in_tx: list of tx_id
- out_tx: list of tx_id

### 3. CryptoPunk

- **punk_id**
- type (Alien, Ape, Zombie, Female, Male)
- gender
- skin_tone
- img_dir
- count
- accessories

In [1]:
import json
import numpy as np
import pandas as pd

# 1. Transaction

- **tx_id**
- datetime
- from: trader_id
- yo: trader_id
- ether
- punk_id: punk_id

In [20]:
DUNE_PATH = './dune_data/'

In [21]:
# CryptoPunk PunkTranfer

with open('{}cryptopunk_transfer.json'.format(DUNE_PATH)) as f:
    cryptopunk_tr = json.load(f)
    
# Log of the number of transactions per year and in total 
print(cryptopunk_tr.keys())
tx_count_per_year = list()
for year in cryptopunk_tr:
    tx_count_per_year.append(len(cryptopunk_tr[year]['data']['get_result_by_result_id']))
    print('{}: count of transactions = {}'.format(year, tx_count_per_year[-1]))

total_tx_count = sum(tx_count_per_year)
print('\nCount of transactions = {}'.format(total_tx_count))

# Merge all transactions into one dataframe    
tr_dict = dict()
idx = 0

for year in cryptopunk_tr.keys():
    cryptopunk_trade_year = cryptopunk_tr[year]['data']['get_result_by_result_id']
    for i in range(len(cryptopunk_trade_year)):
        trade_data = [cryptopunk_trade_year[i]['data'][attr] for attr in cryptopunk_trade_year[i]['data']]
        tr_dict[idx] = trade_data
        idx += 1   


tr_df = pd.DataFrame.from_dict(tr_dict, orient='index')                         ## Create dataframe from dictionary
tr_df.columns = ['date', 'time', 'buyer', 'punk_id', 'seller']                  ## Rename columns
tr_df['ether'] = np.zeros(len(tr_df))                                           ## Add ether column
tr_df['datetime'] = pd.to_datetime(tr_df['date'] + ' ' + tr_df['time'])         ## Convert date and time to datetime
tr_df.sort_values(by='datetime', ascending=False, inplace=True)                 ## Sort by datetime
tr_df.reset_index(inplace=True)                                                
tr_df = tr_df.loc[:, ['datetime', 'seller', 'buyer', 'ether', 'punk_id']]
tr_df.reset_index(inplace=True)                                                 ## Get tx_id
tr_df.rename(columns={'index': 'tx_id'}, inplace=True)

tr_df

dict_keys(['2017-2019', '2020', '2021', '2022'])
2017-2019: count of transactions = 2244
2020: count of transactions = 4777
2021: count of transactions = 13788
2022: count of transactions = 2904

Count of transactions = 23713


Unnamed: 0,tx_id,datetime,seller,buyer,ether,punk_id
0,0,2022-05-05 08:05:15,\xe48ab528f2b51fa68e22d57069cffafcd4aa2b6c,\xaf016ec2afd326126d7f43498645a33a4acf51f2,0.0,8936
1,1,2022-05-05 04:05:31,\x810fdbc7e5cfe998127a1f2aa26f34e64e0364f4,\x828cc5c913fdcf50a62a919c12fed7fea8084742,0.0,9695
2,2,2022-05-04 21:05:11,\xa397d02928acaaf6553fd7e832413c5a732cf559,\xbc43fe36190c8f290c6195e33765d41afca31f2c,0.0,6839
3,3,2022-05-04 20:05:22,\x746a4b63b86df614bd359b57b1b865531e7bffb0,\x50a3535b50248b25ada1c7712bb2bf71704e8086,0.0,2728
4,4,2022-05-04 19:05:59,\x51eac3daa1c34f5c2874aa62097ac9965a180b6d,\xcc2a855946a3c20683858fe6ee15acf8b836f0b3,0.0,286
...,...,...,...,...,...,...
23708,23708,2017-06-24 11:06:22,\xe4d36bb198a38fc1d5aee62164831e86dfcdac88,\x53ede7cae3eb6a7d11429fe589c0278c9acbe21a,0.0,1351
23709,23709,2017-06-24 11:06:22,\xe4d36bb198a38fc1d5aee62164831e86dfcdac88,\x53ede7cae3eb6a7d11429fe589c0278c9acbe21a,0.0,1116
23710,23710,2017-06-24 05:06:17,\x5b098b00621eda6a96b7a476220661ad265f083f,\xcbd482af76059e2a5e4a825c4cd5ced9f2dfe286,0.0,6491
23711,23711,2017-06-23 22:06:18,\x5b098b00621eda6a96b7a476220661ad265f083f,\x871f66e0c9c77141d25fb52222ace830b584f209,0.0,5957


In [22]:
# CryptoPunk Bought

with open('{}cryptopunk_bought.json'.format(DUNE_PATH)) as f:
    cryptopunk_bt = json.load(f)

# Log of the number of transactions per year and in total 
print(cryptopunk_bt.keys())
tx_count_per_year = list()
for year in cryptopunk_bt:
    tx_count_per_year.append(len(cryptopunk_bt[year]['data']['get_result_by_result_id']))
    print('{}: count of transactions = {}'.format(year, tx_count_per_year[-1]))

total_tx_count = sum(tx_count_per_year)
print('\nCount of transactions = {}'.format(total_tx_count))

# Merge all transactions into one dataframe    
bt_dict = dict()
idx = 0

for year in cryptopunk_bt.keys():
    cryptopunk_trade_year = cryptopunk_bt[year]['data']['get_result_by_result_id']
    for i in range(len(cryptopunk_trade_year)):
        trade_data = [cryptopunk_trade_year[i]['data'][attr] for attr in cryptopunk_trade_year[i]['data']]
        bt_dict[idx] = trade_data
        idx += 1   


bt_df = pd.DataFrame.from_dict(bt_dict, orient='index')                         ## Create dataframe from dictionary
bt_df.columns = ['date', 'time', 'buyer', 'ether', 'punk_id', 'seller']         ## Rename columns
bt_df['datetime'] = pd.to_datetime(bt_df['date'] + ' ' + bt_df['time'])         ## Convert date and time to datetime
bt_df.sort_values(by='datetime', ascending=False, inplace=True)                 ## Sort by datetime
bt_df.reset_index(inplace=True)                                                
bt_df = bt_df.loc[:, ['datetime', 'seller', 'buyer', 'ether', 'punk_id']]
bt_df.reset_index(inplace=True)                                                 ## Get tx_id
bt_df.rename(columns={'index': 'tx_id'}, inplace=True)

bt_df

dict_keys(['2017', '2018', '2019', '2020', '2021', '2022'])
2017: count of transactions = 1286
2018: count of transactions = 899
2019: count of transactions = 1068
2020: count of transactions = 2877
2021: count of transactions = 12245
2022: count of transactions = 1672

Count of transactions = 20047


Unnamed: 0,tx_id,datetime,seller,buyer,ether,punk_id
0,0,2022-05-04 20:05:55,\x77ce19a2c22fa64127bfdd6c7761fe43f78e5305,\xc9ff792d842ce164478d810437834725e4aa330e,61.50,1648
1,1,2022-05-04 18:05:40,\x3280f84f5eaa02f767f8e5e74184e98e785894af,\x51eac3daa1c34f5c2874aa62097ac9965a180b6d,63.00,286
2,2,2022-05-04 16:05:37,\x68fb8be9256ff1d7259a67333188c1a96a56adec,\x83c8f28c26bf6aaca652df1dbbe0e1b56f8baba2,69.69,5383
3,3,2022-05-04 14:05:29,\x8036a3b3695879d7bdbbfc1116016507c2691587,\x0000000000000000000000000000000000000000,0.00,3512
4,4,2022-05-04 14:05:20,\x8036a3b3695879d7bdbbfc1116016507c2691587,\x0000000000000000000000000000000000000000,0.00,7214
...,...,...,...,...,...,...
20042,20042,2017-06-23 22:06:08,\x5b098b00621eda6a96b7a476220661ad265f083f,\x00bd3a6660309fb9e0129b9b777a9ccb9c2869dc,0.06,5624
20043,20043,2017-06-23 21:06:53,\xc352b534e8b987e036a93539fd6897f53488e56a,\x00bd9fd57c423a1b1c969823d409156d90974d77,0.10,5056
20044,20044,2017-06-23 21:06:45,\x5b098b00621eda6a96b7a476220661ad265f083f,\x00bd3a6660309fb9e0129b9b777a9ccb9c2869dc,0.04,5719
20045,20045,2017-06-23 21:06:32,\x5b098b00621eda6a96b7a476220661ad265f083f,\xc352b534e8b987e036a93539fd6897f53488e56a,0.01,3134


In [23]:
all_tx_df = pd.concat([tr_df, bt_df], axis=0)
all_tx_df['tx_id'] = np.arange(len(all_tx_df))

# Index the traders
sellers = all_tx_df['seller'].unique()
buyers = all_tx_df['buyer'].unique()
traders_ids = np.unique(np.concatenate((sellers, buyers)))

traders_ids = pd.DataFrame(traders_ids, columns=['trader'])
traders_ids.reset_index(inplace=True)
traders_ids.set_index('trader', inplace=True)

# Get the tx df
all_tx_df['from'] = all_tx_df['seller'].apply(lambda x: traders_ids.loc[x, 'index'])
all_tx_df['to'] = all_tx_df['buyer'].apply(lambda x: traders_ids.loc[x, 'index'])
all_tx_df.reset_index(inplace=True)
all_tx_df = all_tx_df.loc[:, ['tx_id', 'datetime', 'from', 'to', 'punk_id', 'ether']]
all_tx_df

Unnamed: 0,tx_id,datetime,from,to,punk_id,ether
0,0,2022-05-05 08:05:15,7978,6084,8936,0.00
1,1,2022-05-05 04:05:31,4450,4502,9695,0.00
2,2,2022-05-04 21:05:11,5666,6543,6839,0.00
3,3,2022-05-04 20:05:22,3979,2746,2728,0.00
4,4,2022-05-04 19:05:59,2801,7157,286,0.00
...,...,...,...,...,...,...
43755,43755,2017-06-23 22:06:08,3126,53,5624,0.06
43756,43756,2017-06-23 21:06:53,6807,54,5056,0.10
43757,43757,2017-06-23 21:06:45,3126,53,5719,0.04
43758,43758,2017-06-23 21:06:32,3126,6807,3134,0.01


# 2. Trader

- **trader_id**
- address
- in_tx: list of tx_id
- out_tx: list of tx_id

In [5]:
trader_dict = dict()

# Get all transactions for a trader
## {trader_id: {'outs': [], 'ins': []}}

for tx in range(len(all_df)):
    tx_id = all_df.loc[tx, 'tx_id']
    
    from_id = all_df.loc[tx, 'from']
    # print(type(from_id))
    if from_id not in list(trader_dict.keys()):
        trader_dict[from_id] = {'out_tx':[tx_id], 'in_tx':[]}
    else:
        trader_dict[from_id]['out_tx'].append(tx_id)
      
    to_id = all_df.loc[tx, 'to']  
    if to_id not in list(trader_dict.keys()):
        trader_dict[to_id] = {'out_tx':[], 'in_tx':[tx_id]}
    else:
        trader_dict[to_id]['in_tx'].append(tx_id)

trader_dict

{7978: {'out_tx': [0, 10], 'in_tx': [23735, 23828, 23881, 29992]},
 6084: {'out_tx': [], 'in_tx': [0, 10]},
 4450: {'out_tx': [1, 48, 76, 77, 155, 189, 218, 298, 328, 332],
  'in_tx': [7,
   21,
   32,
   42,
   43,
   49,
   65,
   66,
   74,
   81,
   118,
   124,
   126,
   127,
   130,
   131,
   148,
   151,
   156,
   159,
   161,
   162,
   165,
   170,
   174,
   175,
   178,
   187,
   193,
   199,
   203,
   207,
   208,
   209,
   210,
   211,
   213,
   215,
   219,
   222,
   224,
   225,
   227,
   230,
   232,
   234,
   236,
   238,
   239,
   242,
   243,
   244,
   245,
   247,
   249,
   250,
   253,
   254,
   255,
   256,
   259,
   260,
   262,
   264,
   265,
   268,
   269,
   270,
   271,
   272,
   275,
   276,
   283,
   284,
   289,
   290,
   292,
   293,
   295,
   297,
   299,
   308,
   326,
   327,
   329,
   334,
   335]},
 4502: {'out_tx': [194], 'in_tx': [1, 651]},
 5666: {'out_tx': [2, 469, 2896, 2898, 2904, 2906, 25022, 25465, 25534, 27140],
  'in_

In [14]:
# Merge all traders into one dataframe    
trader_df = pd.DataFrame.from_dict(trader_dict, orient='index')                       ## Create dataframe from dictionary
trader_df.reset_index(inplace=True)
trader_df.rename(columns={'index': 'trader_id'}, inplace=True)

traders_ads = traders_ids.reset_index()
traders_ads.set_index('index', inplace=True)

# Get addresses for each trader
trader_df['address'] = trader_df['trader_id'].apply(lambda x: traders_ads.loc[x, 'trader'])
trader_df = trader_df.loc[:, ['trader_id', 'address', 'in_tx', 'out_tx']]

print('Count of traders = {}'.format(len(trader_df)))

# Sort by trader_id
trader_df.sort_values(by='trader_id', ascending=True, inplace=True)
trader_df.reset_index(inplace=True)
trader_df = trader_df.loc[:, ['trader_id', 'address', 'in_tx', 'out_tx']]
trader_df
trader_df

Count of traders = 8921


Unnamed: 0,trader_id,address,in_tx,out_tx
0,0,\x0000000000000000000000000000000000000000,"[14862, 23716, 23717, 23719, 23733, 23741, 237...",[]
1,1,\x0000000000000000000000000000000000000001,[22333],[]
2,2,\x000000000000000000000000000000000000dead,[7764],[]
3,3,\x00000000000000000000005cda7ec9514b4f5959,[13776],[]
4,4,\x000000000cc7e508b4b115e64d71ef374cfb7703,[28075],[5848]
...,...,...,...,...
8916,8916,\xfff1b1e92accedfd67b01b1058edb123cbbdede7,"[42828, 42829, 42830]","[8248, 8249, 39622]"
8917,8917,\xfff423212eb0b97788d43a0c38b7d5762ba3c6e6,[4876],[24566]
8918,8918,\xfff7b45fea022ea9bb5822f5a065e31d8e235b0d,[29483],[5730]
8919,8919,\xfffa087331ebc28737dfc80865fa655dc443df47,[1861],[1863]


# 3. CryptoPunk

- **punk_id**
- img_url
- type (Alien, Ape, Zombie, Female, Male)
- skin_color
- count
- accessories
- current_owner: trader_id

In [15]:
# Read the data file with skin tone
CSV_PATH = './cp/'
csvs = ['1000-1999.csv', '2000-2999.csv', '3000-3999.csv',
        '4000-4999.csv', '5000-5999.csv', '6000-6999.csv',
        '7000-7999.csv', '8000-8999.csv', '9000-9999.csv']

punk_df = pd.read_csv(CSV_PATH+'0-999.csv')
for csv in csvs:
  punk_df_tem = pd.read_csv(CSV_PATH+csv)
  punk_df = pd.concat([punk_df, punk_df_tem], axis=0)

punk_df.rename(columns={'id': 'punk_id',' type':'type', ' gender':'gender', ' skin tone': 'skin_tone', ' count':'count', ' accessories':'accessories'}, inplace=True)
punk_df.reset_index(inplace=True)
punk_df = punk_df.loc[:, ['punk_id', 'type', 'gender', 'skin_tone', 'count', 'accessories']]
punk_df['img_dir'] = punk_df['punk_id'].apply(lambda x: './imgs/{}.png'.format(x))
punk_df

Unnamed: 0,punk_id,type,gender,skin_tone,count,accessories,img_dir
0,0,Human,Female,Medium,3,Green Eye Shadow / Earring / Blonde Bob,./imgs/0.png
1,1,Human,Male,Dark,2,Smile / Mohawk,./imgs/1.png
2,2,Human,Female,Light,1,Wild Hair,./imgs/2.png
3,3,Human,Male,Dark,3,Wild Hair / Nerd Glasses / Pipe,./imgs/3.png
4,4,Human,Male,Medium,4,Big Shades / Wild Hair / Earring / Goat,./imgs/4.png
...,...,...,...,...,...,...,...
9995,9995,Human,Female,Albino,2,Purple Eye Shadow / Straight Hair Dark,./imgs/9995.png
9996,9996,Human,Male,Light,4,Cigarette / Earring / Crazy Hair / Smile,./imgs/9996.png
9997,9997,Zombie,Male,,2,Front Beard / Cap Forward,./imgs/9997.png
9998,9998,Human,Female,Medium,3,Wild White Hair / Black Lipstick / Clown Eyes...,./imgs/9998.png


# Save to `csv`

In [18]:
CSV_PATH = './database/'

all_tx_df.to_csv('{}transaction.csv'.format(CSV_PATH), index=False)
trader_df.to_csv('{}trader.csv'.format(CSV_PATH), index=False)
punk_df.to_csv('{}punk.csv'.format(CSV_PATH), index=False)