In [1]:
import polars as pl
pl.Config.set_tbl_rows(10)
pl.Config.set_fmt_str_lengths(64)  # txid's are 64 characters

polars.config.Config

In [2]:
bh_votes = pl.read_parquet('./dfs/block_height_votes.parquet')
block_times = pl.read_parquet('./dfs/block_times.parquet')
feds = pl.read_parquet('./dfs/federations.parquet')
ln_contracts = pl.read_parquet('./dfs/ln_contracts.parquet')
session_times = pl.read_parquet('./dfs/session_times.parquet')
sessions = pl.read_parquet('./dfs/sessions.parquet')
# tx_inputs = pl.read_parquet('./dfs/transaction_inputs.parquet')
# tx_outputs = pl.read_parquet('./dfs/transaction_outputs.parquet')
# txs = pl.read_parquet('./dfs/transactions.parquet')

### Observation: There are 4 proposers in this federation and they seem to propose on a rotation basis.

In [3]:
bh_votes.drop('federation_id').describe()

statistic,session_index,item_index,proposer,height_vote
str,f64,f64,f64,f64
"""count""",67375.0,67375.0,67375.0,67375.0
"""null_count""",0.0,0.0,0.0,0.0
"""mean""",37130.11249,7.795503,1.500007,846283.22967
"""std""",21579.724816,5.531096,1.118049,4869.951055
"""min""",0.0,0.0,0.0,837849.0
"""25%""",18338.0,4.0,1.0,842063.0
"""50%""",37079.0,7.0,2.0,846290.0
"""75%""",56210.0,11.0,3.0,850502.0
"""max""",73627.0,49.0,3.0,854712.0


In [4]:
bh_votes.group_by('proposer').len()

proposer,len
i64,u32
2,16844
1,16843
0,16844
3,16844


### Observation: Date range of blocks is from December 6, 2023 thru July 30, 2024.

In [5]:
block_times.with_columns(pl.from_epoch(pl.col('timestamp'), time_unit='s').alias('timestamp_human')).sort(by='timestamp')

block_height,timestamp,timestamp_human
i64,i64,datetime[μs]
820001,1701860884,2023-12-06 11:08:04
820002,1701861037,2023-12-06 11:10:37
820003,1701861079,2023-12-06 11:11:19
820004,1701864733,2023-12-06 12:12:13
820005,1701865352,2023-12-06 12:22:32
…,…,…
854717,1722373727,2024-07-30 21:08:47
854718,1722374399,2024-07-30 21:19:59
854719,1722375880,2024-07-30 21:44:40
854720,1722376181,2024-07-30 21:49:41


### Observation: Blocks are sequentially numbered and none are missing.

In [6]:
854721 - 820001 + 1

34721

### Observation: There are 8 instances of multiple blocks being created in the same second.

In [7]:
block_times.group_by('timestamp').agg(pl.col('block_height').n_unique().alias('n')).filter(pl.col('n') > 1)

timestamp,n
i64,u32
1708963130,2
1718464601,2
1706692360,2
1719524215,2
1711363354,2
1703453073,2
1708696352,2
1716714641,2


In [8]:
block_times.filter(pl.col('timestamp') == 1708963130).with_columns(pl.from_epoch(pl.col('timestamp'), time_unit='s'))

block_height,timestamp
i64,datetime[μs]
832145,2024-02-26 15:58:50
832146,2024-02-26 15:58:50


### Observation: There is only one federation represented in this database.

In [9]:
feds

federation_id,config
str,str
"""b21068c84f5b12ca4fdf93f3e443d3bd7c27e8642d0d52ea2e4dce6fdbbee9df""","""0400217773733a2f2f6170692e626974636f696e2d7072696e6369706c65732e…"


### Observation: Some `payment_hash`s are duplicated.

In [10]:
print(f"There are {ln_contracts.shape[0]:,} contracts.")
print(f"There are {ln_contracts['contract_id'].n_unique():,} unique contract ids.")
print(f"There are {ln_contracts['payment_hash'].n_unique():,} unique payment hashes.")
print(f"There are {ln_contracts.filter(pl.col('payment_hash').is_not_null()).shape[0]:,} contracts with a payment hash.")

There are 66,922 contracts.
There are 66,922 unique contract ids.
There are 66,908 unique payment hashes.
There are 66,922 contracts with a payment hash.


In [11]:
ln_contracts.drop('federation_id').filter(pl.col('payment_hash').is_duplicated()).to_pandas()

Unnamed: 0,contract_id,type,payment_hash
0,f2ff6598f162286c107045ffcf7dafe87790d77b0e899e...,outgoing,fd858cc7e4771571c3543784305dbce71ce895e9a3b8f6...
1,ab3d53eb04fb2c67eadfa34e2b6dc3ade9ae99aeeab0d2...,outgoing,fd858cc7e4771571c3543784305dbce71ce895e9a3b8f6...
2,0e7bb561ca58af327c63ca05127537a42c2f52955d66f5...,outgoing,0c9143eac9842802947db4ade201620e02c14ba5199a1b...
3,6d948b5f4ab6beeb55105434ac56257aa6c5d4e0ea52da...,outgoing,0c9143eac9842802947db4ade201620e02c14ba5199a1b...
4,427931abe9fc30f505ceb928ef44997a7bc564cf77379b...,outgoing,0c9143eac9842802947db4ade201620e02c14ba5199a1b...
5,2ea06fb0ca9c042e52db3f181a29583654c8549ac4e245...,outgoing,5e4a469a5f442e123b8e2880fe005d4e997ab7f4cf0e57...
6,e7c4dbd0e1b4c6cdd1f5c35473af0e37a1bfe9952e2296...,outgoing,5e4a469a5f442e123b8e2880fe005d4e997ab7f4cf0e57...
7,e389f2e259cae280ea9c93b59e17d189fd7e0b6adf88f5...,outgoing,0f07546725820194be939821f109329325d9552323ef9c...
8,a0d4d4a32314793df7a8118ebb34f61c315ebdb85f8396...,outgoing,0f07546725820194be939821f109329325d9552323ef9c...
9,da628d0342e628682db38f34c9db4bcb8269cd2f72489d...,outgoing,5ffb0fe6a7570e5f1e83d1a6494a0b2b7ab04fe89471ff...


### Observation: Almost a 50-50 split between `incoming` and `outgoing` lightning contract types.

In [12]:
ln_contracts.group_by('type').len()

type,len
str,u32
"""incoming""",33259
"""outgoing""",33663


### Observation: Sessions are sequentially indexed, none are missing, and range from April 5 thru July 30, 2024.

In [13]:
session_times.drop('federation_id').with_columns(pl.from_epoch(pl.col('estimated_session_timestamp'), time_unit='s').alias('timestamp')).sort(by='timestamp')

session_index,estimated_session_timestamp,timestamp
i64,i64,datetime[μs]
0,1712332271,2024-04-05 15:51:11
1,1712332271,2024-04-05 15:51:11
2,1712332271,2024-04-05 15:51:11
3,1712332271,2024-04-05 15:51:11
4,1712332271,2024-04-05 15:51:11
…,…,…
73623,1722371770,2024-07-30 20:36:10
73624,1722371770,2024-07-30 20:36:10
73625,1722371770,2024-07-30 20:36:10
73626,1722371770,2024-07-30 20:36:10


### Observation: The session data is mostly unique but there is a preponderance of `00` values.

In [14]:
sessions.drop('federation_id')

session_index,session
i64,str
0,"""0f010900070105fe000cc8e303010902070005fe000cc8d903010702050103fd…"
1,"""00"""
2,"""00"""
3,"""00"""
4,"""030115041312001001fe661030fefe09f8a842fe006781a80201150413120010…"
…,…
73623,"""00"""
73624,"""0301170415140012fd413dfe66a960c2fe1c89a561fe0064dcb7030117041514…"
73625,"""0800ec000100dd00db01d9fd141e5aef1718675175af92120142f7a21e380c99…"
73626,"""00"""


In [15]:
sessions.group_by('session').len().filter(pl.col('len') > 1)

session,len
str,u32
"""00""",22797
