In [1]:
import requests
import pandas as pd
import json
from datetime import datetime
from tqdm import tqdm

In [2]:
END_BLOCK = 3782050
root = "/mnt/Ivana/"
src_dir = root + "Data/Tezos/Raw_data/"
dest_dir = root + "Data/Tezos/Processed/"
final_dir = root + "Data/Tezos/Final/"

## Convert contract raw data into per-block statistics

In [None]:
df = pd.read_csv("Data/Tezos/Raw_data/TZ_contract_data.csv")
df

In [30]:
df_contract_stats = df.groupby(["firstActivity", "kind"]).address.count().unstack().reset_index()
df_contract_stats.fillna(0, inplace=True)
df_contract_stats = df_contract_stats[["firstActivity", "asset", "delegator_contract", "smart_contract"]]
df_contract_stats.columns = ["level", "new_assets", "new_delegator_contracts", "new_smart_contracts"]
df_contract_stats.sort_values(by="level", ascending=True, inplace=True)

In [31]:
df_contract_stats["total_assets"] = df_contract_stats.new_assets.cumsum()
df_contract_stats["total_delegator_contracts"] = df_contract_stats.new_delegator_contracts.cumsum()
df_contract_stats["total_smart_contracts"] = df_contract_stats.new_smart_contracts.cumsum()

In [43]:
df_levels = pd.DataFrame({"level" : range(1,END_BLOCK+1)})
df_result = pd.merge(df_levels, df_contract_stats, how="left", on="level")

In [46]:
df_result[["new_assets", "new_delegator_contracts", "new_smart_contracts"]] = df_result[["new_assets", "new_delegator_contracts", "new_smart_contracts"]].fillna(0)
df_result[["total_assets", "total_delegator_contracts", "total_smart_contracts"]] = df_result[["total_assets", "total_delegator_contracts", "total_smart_contracts"]].fillna(method='ffill')

In [49]:
df_result.to_csv("Data/Tezos/Processed/Contracts_per_level.csv", index=False)

## Daily statistics of supply

In [15]:
df = pd.read_csv(src_dir + "TZ_daily_statistics.csv")
df.drop(['totalBootstrapped', 'totalCommitments','totalRollupBonds', 'totalSmartRollupBonds'], axis=1, inplace=True)
df.to_csv(dest_dir + "TZ_supply.csv", index=False)

## Convert voting periods

In [4]:
df = pd.read_csv(src_dir + "TZ_voting_periods.csv")
df_levels = pd.DataFrame({"level" : range(1,END_BLOCK+1)})
df

Unnamed: 0,index,epoch,firstLevel,startTime,lastLevel,endTime,kind,status,dictator,totalBakers,...,supermajority,yayBallots,yayVotingPower,nayBallots,nayVotingPower,passBallots,passVotingPower,yayRolls,nayRolls,passRolls
0,0,0,1,2018-06-30T17:39:57Z,32768,2018-07-24T22:19:27Z,proposal,no_proposals,none,8.0,...,,,,,,,,,,
1,1,1,32769,2018-07-24T22:20:27Z,65536,2018-08-18T04:17:56Z,proposal,no_proposals,none,425.0,...,,,,,,,,,,
2,2,2,65537,2018-08-18T04:20:11Z,98304,2018-09-11T00:12:14Z,proposal,no_proposals,none,448.0,...,,,,,,,,,,
3,3,3,98305,2018-09-11T00:13:14Z,131072,2018-10-04T12:51:29Z,proposal,no_proposals,none,411.0,...,,,,,,,,,,
4,4,4,131073,2018-10-04T12:52:29Z,163840,2018-10-27T21:31:14Z,proposal,no_proposals,none,446.0,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
98,98,46,3596289,2023-05-26T06:02:44Z,3678208,2023-06-09T14:37:23Z,promotion,success,none,408.0,...,80.0,122.0,2.318774e+14,1.0,1.595413e+11,28.0,1.809579e+14,38646.0,26.0,30159.0
99,99,46,3678209,2023-06-09T14:37:38Z,3760128,2023-06-24T00:06:55Z,adoption,success,none,,...,,,,,,,,,,
100,100,47,3760129,2023-06-24T00:07:10Z,3842048,2023-07-08T11:38:00Z,proposal,no_proposals,none,398.0,...,,,,,,,,,,
101,101,48,3842049,2023-07-08T11:38:15Z,3923968,2023-07-22T22:51:36Z,proposal,no_proposals,none,388.0,...,,,,,,,,,,


In [14]:
df[df.status == "active"]

Unnamed: 0,index,epoch,firstLevel,startTime,lastLevel,endTime,kind,status,dictator,totalBakers,...,supermajority,yayBallots,yayVotingPower,nayBallots,nayVotingPower,passBallots,passVotingPower,yayRolls,nayRolls,passRolls
102,102,49,3923969,2023-07-22T22:51:51Z,4005888,2023-08-06T06:14:35Z,proposal,active,none,374.0,...,,,,,,,,,,


In [5]:
df[df.kind==].groupby("stat").status.unique()

kind
adoption                                        [success]
exploration        [success, no_quorum, no_supermajority]
promotion                                       [success]
proposal       [no_proposals, success, no_quorum, active]
testing                                         [success]
Name: status, dtype: object

In [6]:
for i, row in df.iterrows():
    proposal_success = (row["kind"] == "proposal") & (row["status"] == "success")
    adoption_success = row["kind"] == "adoption"
    testing_success = row["kind"] == "testing"


    ind = df_levels[(df_levels.level >= int(row["firstLevel"])) & (df_levels.level <= int(row["lastLevel"]))].index

    df_levels.loc[ind, "proposal_success"] = proposal_success
    df_levels.loc[ind, "adoption"] = adoption_success
    df_levels.loc[ind, "testing"] = testing_success
    df_levels.loc[ind, "voting_power"] = row["totalVotingPower"]



In [7]:
df_levels.to_csv(dest_dir + "Voting_by_block.csv", index = False)

## Convert Baking data into daily statistics

In [3]:
df = pd.read_csv(src_dir + "Baking_data.csv")
df

Unnamed: 0,type,id,level,timestamp,block,proposer_alias,proposer_address,producer_alias,producer_address,payloadRound,blockRound,deposit,reward,bonus,fees,baker_alias,baker_address,priority
0,baking,574317635043328,3550000,2023-05-18T01:53:16Z,BM96QfUaCfEi9Uyjz5Xu1hwrRGbF5tZPPE67w2ben4dqK7...,P2P Validator,tz1P2Po7YM526ughEsRbY4oR9zaUPDZjxFrb,P2P Validator,tz1P2Po7YM526ughEsRbY4oR9zaUPDZjxFrb,0,0,0,5000000,4939615,2254,P2P Validator,tz1P2Po7YM526ughEsRbY4oR9zaUPDZjxFrb,0
1,baking,574317870972928,3550001,2023-05-18T01:53:31Z,BKmxGhZdjhvAf8T3dSG2Y4DcXbZoEpf3F9aj7TFqQJ3nEu...,pos.dog,tz1VQnqCCqX4K5sP3FNkVSNKTdCAMJDd3E1n,pos.dog,tz1VQnqCCqX4K5sP3FNkVSNKTdCAMJDd3E1n,0,0,0,5000000,4939615,1179,pos.dog,tz1VQnqCCqX4K5sP3FNkVSNKTdCAMJDd3E1n,0
2,baking,574318101659648,3550002,2023-05-18T01:53:46Z,BMcZSjq1fknhkpywa9pFYzMnNY9aueSYkaLQKKbt4atZPL...,,tz3QT9dHYKDqh563chVa6za8526ys1UKfRfL,,tz3QT9dHYKDqh563chVa6za8526ys1UKfRfL,0,0,0,5000000,4954616,2447,,tz3QT9dHYKDqh563chVa6za8526ys1UKfRfL,0
3,baking,574318353317888,3550003,2023-05-18T01:54:01Z,BMRgaMyzdScQDNUXXd1FHDhqTnodhmxC1hnsnPXvALCHqi...,Everstake,tz1aRoaRhSpRYvFdyvgWLL6TGyRoGF51wDjM,Everstake,tz1aRoaRhSpRYvFdyvgWLL6TGyRoGF51wDjM,0,0,0,5000000,4956759,2410,Everstake,tz1aRoaRhSpRYvFdyvgWLL6TGyRoGF51wDjM,0
4,baking,574318610219008,3550004,2023-05-18T01:54:16Z,BLq92KpqjaMxS4mbACt8SRk1o7LzHheNsRgj9nza266yMQ...,Happy Tezos,tz1WCd2jm4uSt4vntk4vSuUWoZQGhLcDuR9q,Happy Tezos,tz1WCd2jm4uSt4vntk4vSuUWoZQGhLcDuR9q,0,0,0,5000000,4956759,9554,Happy Tezos,tz1WCd2jm4uSt4vntk4vSuUWoZQGhLcDuR9q,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3789993,baking,57141751709696,1509995,2021-06-10T22:49:38Z,BLDSWFiFrh8fq9QqGSBPjsH3aPvrhX9pYMitsrghz2F26F...,Binance Baker,tz1S8MNvuFEUsWgjHvi3AxibRBf388NhT1q2,Binance Baker,tz1S8MNvuFEUsWgjHvi3AxibRBf388NhT1q2,0,0,512000000,30000000,0,167840,Binance Baker,tz1S8MNvuFEUsWgjHvi3AxibRBf388NhT1q2,0
3789994,baking,57141825110016,1509996,2021-06-10T22:50:38Z,BLJoBy3go3Rqs9T13KbMw3NgVMMKZoQniMdJ1LmL2H46Ws...,,tz3RKYFsLuQzKBtmYuLNas7uMu3AsYd4QdsA,,tz3RKYFsLuQzKBtmYuLNas7uMu3AsYd4QdsA,0,0,512000000,36250000,0,400997,,tz3RKYFsLuQzKBtmYuLNas7uMu3AsYd4QdsA,0
3789995,baking,57141977153536,1509997,2021-06-10T22:51:38Z,BMNDZQMftYSPb91XB1CAheXStXcDbBc988Ku38igCsiAMM...,Foundation baker 2 legacy,tz3bvNMQ95vfAYtG8193ymshqjSvmxiCUuR5,Foundation baker 2 legacy,tz3bvNMQ95vfAYtG8193ymshqjSvmxiCUuR5,0,0,512000000,38750000,0,232753,Foundation baker 2 legacy,tz3bvNMQ95vfAYtG8193ymshqjSvmxiCUuR5,0
3789996,baking,57142062088192,1509998,2021-06-10T22:52:38Z,BMbSH8GJ1PYsoiFDMFaLrj9tNKdpvLaCszUa3pTA5esucQ...,Moonstake,tz1MQMiZHV8q4tTwUMWmS5Y3kaP6J2136iXr,Moonstake,tz1MQMiZHV8q4tTwUMWmS5Y3kaP6J2136iXr,0,0,512000000,40000000,0,485086,Moonstake,tz1MQMiZHV8q4tTwUMWmS5Y3kaP6J2136iXr,0


In [4]:
df.timestamp = pd.to_datetime(df.timestamp, format="%Y-%m-%dT%H:%M:%SZ")
df["date"] = df.timestamp.apply(lambda x: x.date())

In [23]:
aggs = {
    "deposit" : ["mean", "sum"],
    "reward" : ["mean", "sum"],
    "bonus" : ["mean", "sum"],
    "fees" : ["mean", "sum"],
    "baker_address" : "nunique"
}
df_daily = df.groupby("date").aggregate(aggs)

In [24]:
cols = ["date", "avg_deposit", "deposit_total", "avg_reward", "reward_total", "avg_bonus", "bonus_total", "avg_fees", "fees_total", "nr_bakers"]
df_daily = df_daily.reset_index()
df_daily.columns = cols
df_daily

Unnamed: 0,date,avg_deposit,deposit_total,avg_reward,reward_total,avg_bonus,bonus_total,avg_fees,fees_total,nr_bakers
0,2018-06-30,0.000000e+00,0,0.0,0,0.000000e+00,0,2129.281915,800610,8
1,2018-07-01,0.000000e+00,0,0.0,0,0.000000e+00,0,2500.078691,3590113,8
2,2018-07-02,0.000000e+00,0,0.0,0,0.000000e+00,0,3236.193978,4621285,8
3,2018-07-03,3.243394e+06,4664000000,0.0,0,0.000000e+00,0,2303.459666,3312375,8
4,2018-07-04,8.000000e+06,11512000000,0.0,0,0.000000e+00,0,2294.140375,3301268,8
...,...,...,...,...,...,...,...,...,...,...
1821,2023-06-25,0.000000e+00,0,5000000.0,28145000000,4.745639e+06,26713202190,22136.483923,124606268,189
1822,2023-06-26,0.000000e+00,0,5000000.0,28165000000,4.793497e+06,27001767855,18826.376886,106048981,196
1823,2023-06-27,0.000000e+00,0,5000000.0,28175000000,4.818843e+06,27154180158,179881.573736,1013632668,190
1824,2023-06-28,0.000000e+00,0,5000000.0,28515000000,4.869043e+06,27768149658,67342.705418,384055449,195


In [26]:
df_daily.to_csv(final_dir + "TZ_baking_daily.csv", index=False)

## Group blocks by month

In [22]:
df_blocks = pd.read_csv("../../Data/Tezos/Raw_data/TZ_Blocks.csv")

In [23]:
df_blocks.timestamp = pd.to_datetime(df_blocks.timestamp,  format="%Y-%m-%dT%H:%M:%SZ")
df_blocks.sort_values(by="timestamp", inplace=True)
df_blocks["month"] = df_blocks.timestamp.apply(lambda x: format(x, "%m-%Y"))

In [27]:
block_by_month = df_blocks.groupby("month").agg({"level":["min", "max"]}).reset_index()
block_by_month.columns = ["date", "startLevel", "endLevel"]
block_by_month["time"] = pd.to_datetime(block_by_month.date, format="%m-%Y")

In [30]:
block_by_month.sort_values(by="time", inplace=True)
block_by_month.to_csv("../../Data/Tezos/Processed/Blocks_by_month.csv", index=False)

## OHLC

In [3]:
df = pd.read_csv(src_dir + "amCharts.csv")
df

Unnamed: 0,date,weighted,close,high,low,open,volume
0,2017-06-24 02:06:00,2.20,2.2020,5.1540,1.4250,3.9540,28410.88
1,2017-06-25 02:06:00,3.02,3.0220,3.0250,2.0000,2.2020,39000.54
2,2017-06-26 02:06:00,1.94,1.9400,3.6570,1.9080,3.0220,16199.11
3,2017-06-27 02:06:00,3.59,3.5910,3.8760,2.0130,1.9400,42346.85
4,2017-06-28 02:06:00,2.32,2.3200,3.6600,2.2890,3.5910,6563.88
...,...,...,...,...,...,...,...
2239,2023-08-13 02:08:00,0.80,0.7940,0.8020,0.7891,0.7920,136339.09
2240,2023-08-14 02:08:00,0.80,0.7990,0.8070,0.7863,0.7940,282060.02
2241,2023-08-15 02:08:00,0.76,0.7640,0.7990,0.7337,0.7990,686183.36
2242,2023-08-16 02:08:00,0.74,0.7318,0.7677,0.7192,0.7640,724759.44


In [5]:
df.date = pd.to_datetime(df.date)
df.rename({"date":"timestamp"}, axis=1, inplace=True)

In [7]:
df["date"] = df.timestamp.apply(lambda x: x.date())
df_final = df[["date", "close", "high", "low", "open"]]

In [9]:
df_final.to_csv(dest_dir + "Cointg_OHLC.csv", index=False)