In [1]:
import pandas as pd
import numpy as np
import re
def format_bigquery_column_names(nftfi):
    # Remove special charachters
    nftfi.columns = [re.sub(r'[^a-zA-Z0-9\s]+', '', column) for column in nftfi.columns]
    # Remove spaces at beginning and end
    nftfi.columns = nftfi.columns.str.strip()
    # Replace spaces with underscore
    nftfi.columns = nftfi.columns.str.replace(' ', '_')
    # Define a function to convert camel-case to kebab-case
    def camel_to_kebab(s):
        # Replace consecutive capital letters with a single lowercase letter
        s = re.sub(r'(?<=[a-z])(?=[A-Z])', '_', s)
        # Convert remaining camel-case string to kebab-case
        s = re.sub(r'(?<!^)(?<!_)(?=[A-Z])(?![A-Z])', '_', s).lower()
        return s
    # Apply the function to all column names
    nftfi.columns = nftfi.columns.map(camel_to_kebab)
    return nftfi



## Load tables

In [2]:
try: 
    mq = pd.read_csv('mq.csv')
except FileNotFoundError:
    mq = pd.read_csv('analysis/metaquants_analysis/mq.csv')

  mq = pd.read_csv('mq.csv')


In [3]:
mq = mq.sort_values(by='block_timestamp', ascending=False)
mq.head()

Unnamed: 0,transaction_hash,block_timestamp,loan_id,to_address,from_address,principal_amount,repayment_amount,erc20_address,erc20_name,due_date,duration_in_days,apr,token_id,collection_address,method,protocol,amt_in_usd,roll_over,block_number
4932,0x9d3ef0aa602a0c7fbd66a4cb58d4eb2c3c8c6df79a02...,2023-05-29 09:02:47+00:00,37267,0x4a29367c5ae9f84ef03e447d1f7dee8e6b16229d,0x47a0dfeb07abebd5f77aa5ffaa18faecd7686b4f,0.741584,0.74707,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-06-28 09:02:47+00:00,30.0,0.09,2742,0x4e1f41613c9084fdb9e34e11fae9412427480e56,borrow,nftfi,1409.094022,,17363630
4933,0x414c4d8a72b9b64905c100ade68dab60a2f66b2faa1b...,2023-05-29 08:45:47+00:00,37266,0x4e239ef731d57f7fd2e30f38b79b5eae21b4dd18,0xc6a6f43d5d52c855ebe1f825c717937a7b901732,0.53,0.555266,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-06-28 08:45:47+00:00,30.0,0.58,1174,0xe785e82358879f061bc3dcac6f0444462d4b5330,borrow,nftfi,1007.009593,,17363545
4934,0x4fa295df3f71f06c59d08f04c2c98b6c272241b35b69...,2023-05-29 08:33:11+00:00,37265,0x17483a88242f13198ec6a4a9f1d92665676a3e95,0x064980edd7d43abee781f49a4e31c06df05ecccb,0.575,0.610351,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-07-01 08:33:11+00:00,33.0,0.680006,6409,0x79fcdef22feed20eddacbb2587640e45491b757f,borrow,nftfi,1092.510407,,17363482
4935,0xda0b0c0a233f35f2af0d013b905bc1143efaa6673014...,2023-05-29 07:36:47+00:00,37264,0xf93f5436d997920129cc9697f0667801c577e847,0x47a0dfeb07abebd5f77aa5ffaa18faecd7686b4f,0.668591,0.673537,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-06-28 07:36:47+00:00,30.0,0.09,345,0xccc441ac31f02cd96c153db6fd5fe0a2f4e6a68d,borrow,nftfi,1273.379564,,17363201
4936,0x081336287b0919137ad5dd5505226fa39750f9944bb1...,2023-05-29 07:16:59+00:00,37263,0xf93f5436d997920129cc9697f0667801c577e847,0x47a0dfeb07abebd5f77aa5ffaa18faecd7686b4f,0.668591,0.673537,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-06-28 07:16:59+00:00,30.0,0.09,5913,0xccc441ac31f02cd96c153db6fd5fe0a2f4e6a68d,borrow,nftfi,1273.379564,,17363104


In [4]:
try: 
    nftfi = pd.read_csv('../../analytics_bot_langchain/data/nftfi/loans_with_transaction_hash.csv')
except FileNotFoundError:
    nftfi = pd.read_csv('analytics_bot_langchain/data/nftfi/loans_with_transaction_hash.csv')
nftfi = format_bigquery_column_names(nftfi)
nftfi = nftfi.rename(columns={'loan_id': 'loan_no', 'loan_date': 'date'})


In [5]:
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.max_colwidth', None)  # Displays all the text for each value in a column

In [6]:
print('MQ')
display(mq.head(2))
print('\n--------------------------------------------------------------------')
print('\n\nNFTfi')
display(nftfi.head(2))

MQ


Unnamed: 0,transaction_hash,block_timestamp,loan_id,to_address,from_address,principal_amount,repayment_amount,erc20_address,erc20_name,due_date,duration_in_days,apr,token_id,collection_address,method,protocol,amt_in_usd,roll_over,block_number
4932,0x9d3ef0aa602a0c7fbd66a4cb58d4eb2c3c8c6df79a02bf54ad1723bfd25a533a,2023-05-29 09:02:47+00:00,37267,0x4a29367c5ae9f84ef03e447d1f7dee8e6b16229d,0x47a0dfeb07abebd5f77aa5ffaa18faecd7686b4f,0.741584,0.74707,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-06-28 09:02:47+00:00,30.0,0.09,2742,0x4e1f41613c9084fdb9e34e11fae9412427480e56,borrow,nftfi,1409.094022,,17363630
4933,0x414c4d8a72b9b64905c100ade68dab60a2f66b2faa1bd23fc24774cfb7e597d8,2023-05-29 08:45:47+00:00,37266,0x4e239ef731d57f7fd2e30f38b79b5eae21b4dd18,0xc6a6f43d5d52c855ebe1f825c717937a7b901732,0.53,0.555266,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-06-28 08:45:47+00:00,30.0,0.58,1174,0xe785e82358879f061bc3dcac6f0444462d4b5330,borrow,nftfi,1007.009593,,17363545



--------------------------------------------------------------------


NFTfi


Unnamed: 0,loan_no,id,loan_contract,status,borrower,lender,loan_principal_amount,maximum_repayment_amount,loan_duration_d,loan_apr,loan_start_time,date,loan_erc20denomination,block_num,transaction_hash,gas_price,gas_limit,gas_used,cumulative_gas_used,effective_gas_price,repaid,liquidated,loan_repaid_time,repaid_amount_paid_to_lender,repaid_transaction_hash,repaid_gas_price,repaid_gas_limit,repaid_gas_used,repaid_cumulative_gas_used,repaid_effective_gas_price,repaid_admin_fee,loan_liquidation_date,liquidated_transaction_hash,liquidated_gas_price,liquidated_gas_limit,liquidated_gas_used,liquidated_cumulative_gas_used,liquidated_effective_gas_price
0,v1.loan.fixed-0,0,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,30000000000000000,35000000000000000,7,869.047619,1589580384,2020-05-15T22:06:24.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10073259,0x7c7675a6613ddaded0b187314c2edd3ad3c5428a7a14e92830b74e03fcaf7210,17782723583,1000000,436882,3212704,17782723583,1.0,,2020-05-16T09:25:02.000Z,34750000000000000,0x3e24b9cd4c0208acc25259d4fcbddf75b48a554d68147acd13e656ea025b8c95,19000000000.0,500000.0,120497.0,9549420.0,19000000000.0,250000000000000,,,,,,,
1,v1.loan.fixed-1,1,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,3000000000000000,3100000000000000,7,173.809524,1589623507,2020-05-16T10:05:07.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10076484,0x937a5320404c0c835f2739c84b229ef837a9f0b0ef92cbbf4a232b5b357bfc33,19755020080,500000,441106,9710213,19755020080,1.0,,2020-05-16T11:50:26.000Z,3095000000000000,0x59d1c26160b7bdde716b380364b506e056d852ac1b360b265109f682a0b89706,16974490000.0,500000.0,122603.0,9470068.0,16974490000.0,5000000000000,,,,,,,


In [7]:
mq['loan_id'].head(2)

4932    37267
4933    37266
Name: loan_id, dtype: int64

### Clean data

In [8]:
# clean loan IDs to try and compare them
mq['loan_id_cleaned'] = mq['loan_id'].astype(str).str.replace('-','')
nftfi['loan_no_cleaned'] = nftfi['loan_no'].str.split('-').apply(lambda x: x[-1])
nftfi

Unnamed: 0,loan_no,id,loan_contract,status,borrower,lender,loan_principal_amount,maximum_repayment_amount,loan_duration_d,loan_apr,loan_start_time,date,loan_erc20denomination,block_num,transaction_hash,gas_price,gas_limit,gas_used,cumulative_gas_used,effective_gas_price,repaid,liquidated,loan_repaid_time,repaid_amount_paid_to_lender,repaid_transaction_hash,repaid_gas_price,repaid_gas_limit,repaid_gas_used,repaid_cumulative_gas_used,repaid_effective_gas_price,repaid_admin_fee,loan_liquidation_date,liquidated_transaction_hash,liquidated_gas_price,liquidated_gas_limit,liquidated_gas_used,liquidated_cumulative_gas_used,liquidated_effective_gas_price,loan_no_cleaned
0,v1.loan.fixed-0,0,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,30000000000000000,35000000000000000,7,869.047619,1589580384,2020-05-15T22:06:24.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10073259,0x7c7675a6613ddaded0b187314c2edd3ad3c5428a7a14e92830b74e03fcaf7210,17782723583,1000000,436882,3212704,17782723583,1.0,,2020-05-16T09:25:02.000Z,34750000000000000,0x3e24b9cd4c0208acc25259d4fcbddf75b48a554d68147acd13e656ea025b8c95,1.900000e+10,500000.0,120497.0,9549420.0,1.900000e+10,250000000000000,,,,,,,,0
1,v1.loan.fixed-1,1,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,3000000000000000,3100000000000000,7,173.809524,1589623507,2020-05-16T10:05:07.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10076484,0x937a5320404c0c835f2739c84b229ef837a9f0b0ef92cbbf4a232b5b357bfc33,19755020080,500000,441106,9710213,19755020080,1.0,,2020-05-16T11:50:26.000Z,3095000000000000,0x59d1c26160b7bdde716b380364b506e056d852ac1b360b265109f682a0b89706,1.697449e+10,500000.0,122603.0,9470068.0,1.697449e+10,5000000000000,,,,,,,,1
2,v1.loan.fixed-2,2,v1.loan.fixed,repaid,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,0xc35A5FEc6BE6957899E15559Be252Db882220b37,30000000000000000,30100000000000000,7,17.380952,1589628655,2020-05-16T11:30:55.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10076867,0xf331e9fb8341f1b644e0edfe599a56402836808b2570578e839261f7c592d2dc,12100000000,500000,403060,9269753,12100000000,1.0,,2020-05-16T12:20:16.000Z,30095000000000000,0xfa036df32071d8b4f301b34aa2893bc0499db7e79bf12452ad6578820215f683,1.320000e+10,500000.0,132834.0,4140219.0,1.320000e+10,5000000000000,,,,,,,,2
3,v1.loan.fixed-3,3,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,3000000000000000,3010000000000000,7,17.380952,1589631935,2020-05-16T12:25:35.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10077100,0x330e8f56d4f6eb07392d8c23858eda1cada42c550ed98e4e2bd5bc80ca1be1ff,15430188679,500000,428215,2708977,15430188679,1.0,,2020-05-16T12:28:51.000Z,3009500000000000,0x8f70de246186ae66da45404c856b9db431b26bfcbe7f100ce405501bbdbff7c2,1.310000e+10,500000.0,122603.0,9570320.0,1.310000e+10,500000000000,,,,,,,,3
4,v1.loan.fixed-4,4,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,200000000000000000,250000000000000000,7,1303.571429,1589633522,2020-05-16T12:52:02.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10077208,0x9bd7a9da5505b3d42152dd5f971378daeb6a0e74a0ef213b877e27764a06d128,14000000000,500000,441098,9403518,14000000000,1.0,,2020-05-16T12:56:27.000Z,247500000000000000,0x0eaead5004befd758f4f4a7c04241098eaa9948192e2bb4d10d77358206426e5,2.100000e+10,500000.0,132203.0,4175144.0,2.100000e+10,2500000000000000,,,,,,,,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40750,v2-1.loan.fixed-33827,33827,v2-1.loan.fixed,,0x887b86B6B6957F7bbeA88B8CEfD392f39236A88C,0x73A1Bb338613291AC6530870Fe2c5D354BFEC17e,600000000,622191781,30,45.000000,1682367647,2023-04-24T20:20:47.000Z,0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48,17118481,0xfd2ae7f526cc0dd5d154e4a24b7d5531201e1b70fab52bd86f0943118bfb5c8f,46000000000,490745,480957,6695289,46000000000,,,,,,,,,,,,,,,,,,,33827
40751,v2.loan.fixed.collection-33811,33811,v2.loan.fixed.collection,,0xeE8Cd54223A93DD9B3B2C4701cb1Be5f6886C366,0x818ec19eBE023b4774799fdB39F0b20aF39571FE,99000000000000000000,99732329000000000000,30,9.000003,1682362607,2023-04-24T18:56:47.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,17118066,0x1ae59b0a6f064d5877d1cb7daf822fbeb6d861be40f86f6d110e0a2dca318625,52268898606,499431,484843,10219934,52268898606,,,,,,,,,,,,,,,,,,,33811
40752,v2.loan.fixed.collection-33814,33814,v2.loan.fixed.collection,,0xCffC336E6D019C1aF58257A0b10bf2146a3f42A4,0x1eC627158650fF5dB5d6FCe7C1Ed6e0086D94F5f,52555000000000000000000,52986526945000000000000,30,9.990000,1682363507,2023-04-24T19:11:47.000Z,0x6B175474E89094C44Da98b954EedeAC495271d0F,17118141,0xc95639c3fe6ffd6062ab914ce7dcbf693ccbdd4545738f5cfddd6941d74c4447,49319816314,522723,512935,6979618,49319816314,,,,,,,,,,,,,,,,,,,33814
40753,v2.loan.fixed.collection-33817,33817,v2.loan.fixed.collection,,0xCffC336E6D019C1aF58257A0b10bf2146a3f42A4,0x1eC627158650fF5dB5d6FCe7C1Ed6e0086D94F5f,52555000000000000000000,52986526945000000000000,30,9.990000,1682363555,2023-04-24T19:12:35.000Z,0x6B175474E89094C44Da98b954EedeAC495271d0F,17118145,0x837a2f77681fe4b192c1721eaf042ea7518a5315a91675b506d078f1e81ca8f6,45264530207,505623,495835,15156691,45264530207,,,,,,,,,,,,,,,,,,,33817


In [9]:
display(mq['loan_id_cleaned'])
display(nftfi['loan_no_cleaned'])

4932     37267
4933     37266
4934     37265
4935     37264
4936     37263
         ...  
49122        4
49123        3
49124        2
49125        1
49126        0
Name: loan_id_cleaned, Length: 61036, dtype: object

0            0
1            1
2            2
3            3
4            4
         ...  
40750    33827
40751    33811
40752    33814
40753    33817
40754    33826
Name: loan_no_cleaned, Length: 40755, dtype: object

In [10]:
mq.shape[0], nftfi.shape[0]

(61036, 40755)

In [11]:
# Check minimum and maximum dates of each table
print(f"min mq date: {mq.block_timestamp.min()}; max mq date: {mq.block_timestamp.max()}")
print(f"min nftfi date: {nftfi.date.min()}; max nftfi date: {nftfi.date.max()}")

min mq date: 2020-05-15 22:06:24+00:00; max mq date: 2023-05-29 09:02:47+00:00
min nftfi date: 2020-05-15T22:06:24.000Z; max nftfi date: 2023-04-24T20:20:47.000Z


In [12]:
mq.loc[mq.loan_id_cleaned.duplicated()==True].sort_values('loan_id_cleaned')

Unnamed: 0,transaction_hash,block_timestamp,loan_id,to_address,from_address,principal_amount,repayment_amount,erc20_address,erc20_name,due_date,duration_in_days,apr,token_id,collection_address,method,protocol,amt_in_usd,roll_over,block_number,loan_id_cleaned
49125,0x937a5320404c0c835f2739c84b229ef837a9f0b0ef92cbbf4a232b5b357bfc33,2020-05-16 10:05:07+00:00,-1,0xc35a5fec6be6957899e15559be252db882220b37,0x44ddf3e35cd2d629d78674ff6bf5b953e2b069fe,0.003,0.003100,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2020-05-16 10:05:14+00:00,0.000081,150171.429000,1881,0xf3e778f839934fc819cfa1040aabacecba01e049,borrow,nftfi,0.600346,,10076484,1
4931,0xfe9b50aab8dbe8f2e876ac4b13d397b45d42660528c57a9188cd8f742d9d9739,2022-07-11 00:35:47+00:00,1,0xb6631e52e513eee0b8c932d7c76f8ccfa607a28e,0x6402cb41945a662e978c6a8a65d93c0ab17f7ac9,5.000,6.000000,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,USDc,2022-10-19 00:35:47+00:00,100.000000,0.730000,342202,0xfbeef911dc5821886e1dda71586d90ed28174b7d,borrow,arcade,5.000000,False,15118144,1
42936,0x166d57a868ae6c3e34c192546aedc243eda0c7ba6685b741f655e3329e2b6e29,2022-03-30 10:21:32+00:00,1,0x3e3b47b5d433be4708a6bd524facd48bb54d10b1,0xd79275564ccad8e679cbbb5e1c2facd46643a372,0.010,0.010038,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2022-04-13 10:21:32+00:00,14.000000,0.099071,1000323,0x059edd72cd353df5106d2b9cc5ab83a52287ac3a,borrow,nftfi,33.888242,,14487024,1
49116,0x07187977a319186a238cbe626ad1311d6d3c0373935139b1687ec07c96ce1da5,2020-05-16 16:24:38+00:00,-10,0xe09b8a054dfcda9c6a5f90d85066d9b6d1bd8025,0xc35a5fec6be6957899e15559be252db882220b37,0.030,0.030010,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2020-05-16 16:24:45+00:00,0.000081,1501.714000,2612,0xf3e778f839934fc819cfa1040aabacecba01e049,borrow,nftfi,6.016246,,10078144,10
4919,0x478e33ae2879008c343884644b0e556f0aa4303aa2c1732960c726a8fdbe8a28,2022-07-15 10:48:05+00:00,10,0x0fbed7537e4120e1021c0f39f53e9953d5d8973d,0x6402cb41945a662e978c6a8a65d93c0ab17f7ac9,5.000,6.000000,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,USDc,2022-07-22 10:48:05+00:00,7.000000,10.429000,1922,0x467686f1c73e77ca9498339463a6e9d58d44d3e0,borrow,arcade,5.000000,False,15146811,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2204,0x6104cd6483876e0bd2fcbe1804c5ade45c9ccab608697f6eabb4cda189b017b4,2023-02-07 19:57:23+00:00,998,0x575740f581038f792425670fa6bfebcc8e4dcdff,0xf4fb9fa23edb32215e5284cf7dbfdb5607d51a5b,28.000,28.782466,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-03-09 19:57:23+00:00,30.000000,0.340000,8010,0xbd3531da5cf5857e7cfaa92426877b022e612cf8,borrow,arcade,45884.454000,False,16579132,998
41260,0x3dc35bf9b580abfdd0ab0b810caaa61f846574720c3f064fcb30305030193599,2022-04-22 21:13:20+00:00,998,0xefffdc05e7c5b305fbd504366b01f2d6424cb8c4,0x0aff497bd016000185b1c8302fa98a88ff4a4178,22.000,23.265753,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2022-05-22 21:13:20+00:00,30.000000,0.700000,27470,0x60e4d786628fea6478f785a6d7e704777c86a7c6,borrow,nftfi,65386.873200,,14636998,998
48127,0xf14bca0bcaae1766d90f9760ac55382538bcc2d9e1229bc2b99130713ced4a63,2021-07-25 16:59:10+00:00,-999,0xefcc4c68e1ddfaa4f0fa3a7479f0fb082f96a56b,0x8e101059bd832496fc443d47ca2b6d0767b288df,1.500,1.524658,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2021-08-24 16:59:10+00:00,30.000000,0.200000,1000031,0x059edd72cd353df5106d2b9cc5ab83a52287ac3a,borrow,nftfi,3209.229450,,12896606,999
60037,0x19cd10139bf64c29b3902a7e8e6e14805cd3c51c84e22b7ab030944e89142674,2022-11-30 22:15:59+00:00,999,0xc0a259d8e9499844b98dc7e5061e1329028e1264,0x8594d7a13bd4da9bc6bab2a2383fefbe039499ee,5.000,5.118904,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2022-12-14 22:15:59+00:00,14.000000,0.620000,4235,0x8a90cab2b38dba80c64b7734e58ee1db38b8992e,borrow,x2y2,6484.786500,,16085719,999


In [13]:
nftfi.head(2)

Unnamed: 0,loan_no,id,loan_contract,status,borrower,lender,loan_principal_amount,maximum_repayment_amount,loan_duration_d,loan_apr,loan_start_time,date,loan_erc20denomination,block_num,transaction_hash,gas_price,gas_limit,gas_used,cumulative_gas_used,effective_gas_price,repaid,liquidated,loan_repaid_time,repaid_amount_paid_to_lender,repaid_transaction_hash,repaid_gas_price,repaid_gas_limit,repaid_gas_used,repaid_cumulative_gas_used,repaid_effective_gas_price,repaid_admin_fee,loan_liquidation_date,liquidated_transaction_hash,liquidated_gas_price,liquidated_gas_limit,liquidated_gas_used,liquidated_cumulative_gas_used,liquidated_effective_gas_price,loan_no_cleaned
0,v1.loan.fixed-0,0,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,30000000000000000,35000000000000000,7,869.047619,1589580384,2020-05-15T22:06:24.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10073259,0x7c7675a6613ddaded0b187314c2edd3ad3c5428a7a14e92830b74e03fcaf7210,17782723583,1000000,436882,3212704,17782723583,1.0,,2020-05-16T09:25:02.000Z,34750000000000000,0x3e24b9cd4c0208acc25259d4fcbddf75b48a554d68147acd13e656ea025b8c95,19000000000.0,500000.0,120497.0,9549420.0,19000000000.0,250000000000000,,,,,,,,0
1,v1.loan.fixed-1,1,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,3000000000000000,3100000000000000,7,173.809524,1589623507,2020-05-16T10:05:07.000Z,0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2,10076484,0x937a5320404c0c835f2739c84b229ef837a9f0b0ef92cbbf4a232b5b357bfc33,19755020080,500000,441106,9710213,19755020080,1.0,,2020-05-16T11:50:26.000Z,3095000000000000,0x59d1c26160b7bdde716b380364b506e056d852ac1b360b265109f682a0b89706,16974490000.0,500000.0,122603.0,9470068.0,16974490000.0,5000000000000,,,,,,,,1


In [14]:
print('MQ:\n')
print("Are loan_id unique? Once grouped by unique, the counts are: ")
mq_loan_id_count = pd.DataFrame(mq.groupby('loan_id').agg('count')['transaction_hash'])
mq_loan_id_count = mq_loan_id_count.reset_index()

display(mq_loan_id_count['transaction_hash'].unique())
display(mq_loan_id_count.loc[mq_loan_id_count['transaction_hash']>1])
duplicated_mq_loans = pd.merge(left=mq, right=mq_loan_id_count.loc[mq_loan_id_count['transaction_hash']>1], left_on='loan_id', right_on='loan_id')
display(duplicated_mq_loans)
display(duplicated_mq_loans.amt_in_usd.sum())
print("Are loan_id_cleaned unique? Once grouped by loan_id_cleaned, the counts are: ")
display(mq.groupby('loan_id_cleaned').agg('count')['transaction_hash'].unique())


MQ:

Are loan_id unique? Once grouped by unique, the counts are: 


array([ 1,  3,  4,  8,  5,  6, 10,  7, 11, 15,  9, 16, 27, 17, 26, 12, 23,
       18, 90, 37, 33, 32, 20, 43, 13, 22,  2])

Unnamed: 0,loan_id,transaction_hash
6928,1,3
6929,2,3
6930,3,3
6931,4,3
6932,5,3
...,...,...
23980,17053,2
23981,17054,2
23982,17055,2
23983,17056,2


Unnamed: 0,transaction_hash_x,block_timestamp,loan_id,to_address,from_address,principal_amount,repayment_amount,erc20_address,erc20_name,due_date,duration_in_days,apr,token_id,collection_address,method,protocol,amt_in_usd,roll_over,block_number,loan_id_cleaned,transaction_hash_y
0,0x6a62f95d9d7a09eb344d8180929c46e03dd4dd98e3b2de8d70d4dbb5babbe896,2023-05-29 07:09:23+00:00,1923,0xf910585a0286b045097c3f62f8f827365ac825ef,0x18beceb3674f20ee3f75c03ef06542c8c882d088,12000.000,12246.575342,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,USDc,2023-06-28 07:09:23+00:00,30.0,0.250000,100020071,0xd92e44ac213b9ebda0178e1523cc0ce177b7fa96,borrow,arcade,12000.000000,False,17363066,1923,3
1,0xc3d2e8e9f5818335d3a7d262e444e2f52a56b5542f9ecd372775d380287540af,2022-12-24 07:59:11+00:00,1923,0x347d8e9bea3ef6a04b59008bbbfa76b05b81d912,0x14026c2c20d6883c550d130d8e9f9a5f7f17c5e9,0.728,0.738171,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2022-12-27 07:59:11+00:00,3.0,1.700000,9989,0x7d8820fa92eb1584636f4f5b8515b5476b75171a,borrow,x2y2,885.643450,,16253254,1923,3
2,0x47bad4cfaca8f3c4466ab2adff445abbf442d9a3f65cd52de0437d43a417cd3c,2022-04-30 12:33:39+00:00,1923,0x727f25672f4f2815831ed496c87b33faeb639238,0x0aff497bd016000185b1c8302fa98a88ff4a4178,0.300,0.322192,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2022-05-30 12:33:39+00:00,30.0,0.900009,3418,0x0616a2ef54bad0b37dce41c8d8e35cce17a926f3,borrow,nftfi,849.372450,,14685606,1923,3
3,0xaa2dcf83293041525efd9286d384a56592e1dc4e87a83aae3658316f21bb2f98,2023-05-28 03:01:23+00:00,1922,0x148216a1800ab3d71668b4126b4244a9a909955f,0x18beceb3674f20ee3f75c03ef06542c8c882d088,50000.000,51027.397260,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,USDc,2023-06-27 03:01:23+00:00,30.0,0.250000,3715,0x3bf2922f4520a8ba0c2efc3d2a1539678dad5e9d,borrow,arcade,50000.000000,False,17354712,1922,3
4,0x29b528505cd3bfc7bdbfaf5b5a98136ae64d2b4fd1a3a6c6abd07ca3d1d662d9,2022-12-24 07:46:35+00:00,1922,0x347d8e9bea3ef6a04b59008bbbfa76b05b81d912,0x53eaa0d7f5e43d47b0b0e30b283e923601eaa80b,0.700,0.730877,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2022-12-31 07:46:35+00:00,7.0,2.300000,9988,0x7d8820fa92eb1584636f4f5b8515b5476b75171a,borrow,x2y2,851.580240,,16253191,1922,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28747,0x60fb1a4c19cef44645af86697f0bb05cb7f3f6e2e75f3d996956292ba7ecb3bf,2022-07-11 12:06:00+00:00,2,0xb6631e52e513eee0b8c932d7c76f8ccfa607a28e,0x6402cb41945a662e978c6a8a65d93c0ab17f7ac9,10.000,12.000000,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,USDc,2022-10-19 12:06:00+00:00,100.0,0.730000,8175,0x4f89cd0cae1e54d98db6a80150a824a533502eea,borrow,arcade,10.000000,False,15121262,2,3
28748,0x9d783e3bbde1663ffa2dcc0bf5eabeccbe3f58475938c42e52f792f8eb85eddf,2022-03-30 10:36:00+00:00,2,0x3e3b47b5d433be4708a6bd524facd48bb54d10b1,0xd79275564ccad8e679cbbb5e1c2facd46643a372,0.010,0.010038,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2022-04-13 10:36:00+00:00,14.0,0.099071,735355,0x06012c8cf97bead5deae237070f9587f8e7a266d,borrow,nftfi,33.888242,,14487091,2,3
28749,0xf621a5ec2091eff1515824f718516d464a82f28516ec6302b43f2716ed1a829a,2022-09-29 12:56:35+00:00,1,0xab67d27e516ebd8a1dfff2b7f99cdb0a8daa8e3e,0xab66b7ab0826886ddb28c0a6ea0cda3eab920b2d,3.773,3.912549,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2022-10-29 12:56:35+00:00,30.0,0.450000,9704,0x8a90cab2b38dba80c64b7734e58ee1db38b8992e,borrow,x2y2,5042.980104,,15639141,1,3
28750,0xfe9b50aab8dbe8f2e876ac4b13d397b45d42660528c57a9188cd8f742d9d9739,2022-07-11 00:35:47+00:00,1,0xb6631e52e513eee0b8c932d7c76f8ccfa607a28e,0x6402cb41945a662e978c6a8a65d93c0ab17f7ac9,5.000,6.000000,0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48,USDc,2022-10-19 00:35:47+00:00,100.0,0.730000,342202,0xfbeef911dc5821886e1dda71586d90ed28174b7d,borrow,arcade,5.000000,False,15118144,1,3


483475802.4709805

Are loan_id_cleaned unique? Once grouped by loan_id_cleaned, the counts are: 


array([ 1,  4,  5,  2, 21,  6,  8,  7, 16, 12, 27, 10, 13,  9, 17, 33, 11,
       28, 44, 14, 24, 23,  3, 18, 19, 91, 38, 34])

In [15]:
print(f"duplicate lines in MQ dataset {mq.shape[0] - mq.drop_duplicates().shape[0]}")
print(f"duplicate lines in nftfi dataset {nftfi.shape[0] - nftfi.drop_duplicates().shape[0]}")

duplicate lines in MQ dataset 31
duplicate lines in nftfi dataset 0


## Clean raw NFTfi dataset

In [16]:
# nftfi.dtypes

In [17]:
nftfi['date'] = pd.to_datetime(nftfi['date'], format="%Y-%m-%d %H:%M:%S%z")
nftfi['loan_start_time'] = pd.to_datetime(nftfi['loan_start_time'], format="%Y-%m-%d %H:%M:%S%z")
nftfi['loan_repaid_time'] = pd.to_datetime(nftfi['loan_repaid_time'], format="%Y-%m-%d %H:%M:%S%z")

# Remove invalid values
nftfi.replace(r"#DIV/0!", np.nan, regex=True, inplace=True)
nftfi.replace(r"#N/A", "", regex=True, inplace=True)

# Divide loanPrincipalAmount and maximumRepaymentAmount by ETH <> WEI i.e. 1^18
columns_to_clean = ['loan_principal_amount', 'maximum_repayment_amount', 'maximum_repayment_amount']
for col in columns_to_clean:
    nftfi[col] = nftfi[col].astype(np.float64)
    nftfi[col] /= 10 ** 18
# nftfi['no_of_days'] = nftfi['no_of_days'].astype(np.float64)
nftfi = nftfi.rename(columns={'loan_apr': 'apr'})
nftfi['apr'] = nftfi['apr'].astype(np.float64)

# Drop last column as it is unnamed
nftfi = nftfi.drop('', axis=1, errors='ignore')

# set BOOL columns to bool type
cols = ['repaid', 'liquidated']
nftfi['repaid'] = nftfi['repaid'].fillna(False)
nftfi['repaid'] = nftfi['repaid'].replace('', False)
for col in cols:
    # nftfi[col] = nftfi[col].astype('boolean')
    nftfi[col] = nftfi[col].map({'True': True, 'False': False})
    nftfi[col] = nftfi[col].astype(bool)
nftfi['repaid'] = nftfi['repaid'].astype(bool)

## Enrich datasets
### Enrich NFTfi dataset with ETH price and compute usd_value of loan for weth-denominated loans

In [18]:
# Enrich dataset with ETHUSD rate i.e. ETHPrice, then fill USDValue for USDValue from loanERC20Denomination == weth_address
weth_address = '0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2'.lower()
price_df = pd.read_csv('../../analytics_bot_langchain/data/nftfi/eth_usd_input_data.csv')
price_df = price_df.rename(columns={'datetime': 'date'})
price_df['date'] = pd.to_datetime(price_df['date'], format='%Y-%m-%d %H:%M:%S%z')
price_df = price_df.drop(columns=['open', 'low', 'volume'])

nftfi['date'] = pd.to_datetime(nftfi['date'])
nftfi['loan_principal_amount'] = nftfi['loan_principal_amount'].astype(float)

nftfi = nftfi.sort_values(by='date')
price_df = price_df.sort_values(by='date')

nftfi = pd.merge_asof(nftfi, price_df, on='date', direction='backward')
nftfi['eth_price'] = nftfi['close']
nftfi = nftfi.drop(columns=['close'])
nftfi['loan_erc20denomination'] = nftfi['loan_erc20denomination'].astype(str).str.lower().str.strip()
nftfi.loc[nftfi['loan_erc20denomination'] == weth_address, 'usd_value'] = nftfi.loc[nftfi['loan_erc20denomination'] == weth_address]['loan_principal_amount'] * nftfi.loc[nftfi['loan_erc20denomination'] == weth_address]['eth_price']

# now map missing usd_value, e.g. for stablecoin-denominated loans
usdc_address = '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48'  # https://etherscan.io/token/0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48
usdt_address = '0xdAC17F958D2ee523a2206206994597C13D831ec7'  # https://etherscan.io/token/0xdac17f958d2ee523a2206206994597c13d831ec7
dai_address = '0x6B175474E89094C44Da98b954EedeAC495271d0F'  # https://etherscan.io/token/0x6b175474e89094c44da98b954eedeac495271d0f
stablecoin_addresses = [usdc_address, usdt_address, dai_address]
for stablecoin_address in stablecoin_addresses:
    stablecoin_address = stablecoin_address.lower()
    nftfi.loc[nftfi['loan_erc20denomination'] == stablecoin_address, 'usd_value'] = nftfi.loc[nftfi['loan_erc20denomination'] == stablecoin_address]['loan_principal_amount']



In [19]:
print('\n\nNFTfi: \n')
print("Are loan_no unique? Once grouped by unique, the counts are: ")
nftfi_loan_no_count = pd.DataFrame(nftfi.groupby('loan_no').agg('count')['borrower'])
duplicated_nftfi_loans = pd.merge(left=nftfi, right=nftfi_loan_no_count.loc[nftfi_loan_no_count['borrower']>1], left_on='loan_no', right_on='loan_no')
display(duplicated_nftfi_loans)
display(duplicated_nftfi_loans.usd_value.sum())

display(nftfi_loan_no_count['borrower'].unique())
display(nftfi_loan_no_count.loc[nftfi_loan_no_count['borrower']>1])

print("Are loan_no_cleaned unique? Once grouped by loan_id_cleaned, the counts are: ")
display(nftfi.groupby('loan_no_cleaned').agg('count')['borrower'].unique())



NFTfi: 

Are loan_no unique? Once grouped by unique, the counts are: 


Unnamed: 0,loan_no,id,loan_contract,status,borrower_x,lender,loan_principal_amount,maximum_repayment_amount,loan_duration_d,apr,loan_start_time,date,loan_erc20denomination,block_num,transaction_hash,gas_price,gas_limit,gas_used,cumulative_gas_used,effective_gas_price,repaid,liquidated,loan_repaid_time,repaid_amount_paid_to_lender,repaid_transaction_hash,repaid_gas_price,repaid_gas_limit,repaid_gas_used,repaid_cumulative_gas_used,repaid_effective_gas_price,repaid_admin_fee,loan_liquidation_date,liquidated_transaction_hash,liquidated_gas_price,liquidated_gas_limit,liquidated_gas_used,liquidated_cumulative_gas_used,liquidated_effective_gas_price,loan_no_cleaned,high,eth_price,usd_value,borrower_y


0.0

array([1])

Unnamed: 0_level_0,borrower
loan_no,Unnamed: 1_level_1


Are loan_no_cleaned unique? Once grouped by loan_id_cleaned, the counts are: 


array([1, 2])

## Enrich nftfi table with ERC20 denominations

In [20]:
# Check if it can be due to missing token
usdc_address = '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48'  # https://etherscan.io/token/0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48
usdt_address = '0xdAC17F958D2ee523a2206206994597C13D831ec7'  # https://etherscan.io/token/0xdac17f958d2ee523a2206206994597c13d831ec7
dai_address = '0x6B175474E89094C44Da98b954EedeAC495271d0F'  # https://etherscan.io/token/0x6b175474e89094c44da98b954eedeac495271d0f
weth_address = '0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2'

stablecoin_addresses = [usdc_address, usdt_address, dai_address, weth_address]

stablecoin_names = ['usdc', 'usdt', 'dai', 'weth']
for stablecoin_address, stablecoin_name in zip(stablecoin_addresses, stablecoin_names):
    stablecoin_address = stablecoin_address.lower()
    nftfi.loc[nftfi['loan_erc20denomination'] == stablecoin_address, 'loan_erc20denomination_name'] = stablecoin_name

In [21]:
# Confirm that the above addresses matches the one available in MQ dataset:
mq.erc20_address.unique()

array(['0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2',
       '0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48',
       '0x6b175474e89094c44da98b954eedeac495271d0f'], dtype=object)

#### Enrich NFTfi table with loan types

In [22]:
nftfi['loan_type'] = nftfi.loan_no.astype(str).str.split('-').apply(lambda x: x[0]+'-'+x[1] if len(x) >2 else x[0])
nftfi['loan_type'].unique()

array(['v1.loan.fixed', 'v2.loan.fixed', 'v2-1.loan.fixed',
       'v2.loan.fixed.collection'], dtype=object)

## Check USD value discrepancies for matching loans on transaction hash

In [23]:
display(nftfi.head(2))
display(mq.head(2))

nftfi.columns = [f'nftfi_{col}' for col in nftfi.columns]
mq.columns = [f'mq_{col}' for col in mq.columns]
full_inner = pd.merge(nftfi, mq, how='inner', left_on='nftfi_transaction_hash', right_on='mq_transaction_hash', indicator=True)


Unnamed: 0,loan_no,id,loan_contract,status,borrower,lender,loan_principal_amount,maximum_repayment_amount,loan_duration_d,apr,loan_start_time,date,loan_erc20denomination,block_num,transaction_hash,gas_price,gas_limit,gas_used,cumulative_gas_used,effective_gas_price,repaid,liquidated,loan_repaid_time,repaid_amount_paid_to_lender,repaid_transaction_hash,repaid_gas_price,repaid_gas_limit,repaid_gas_used,repaid_cumulative_gas_used,repaid_effective_gas_price,repaid_admin_fee,loan_liquidation_date,liquidated_transaction_hash,liquidated_gas_price,liquidated_gas_limit,liquidated_gas_used,liquidated_cumulative_gas_used,liquidated_effective_gas_price,loan_no_cleaned,high,eth_price,usd_value,loan_erc20denomination_name,loan_type
0,v1.loan.fixed-0,0,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,0.03,3.5e-20,7,869.047619,1970-01-01 00:00:01.589580384,2020-05-15 22:06:24+00:00,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,10073259,0x7c7675a6613ddaded0b187314c2edd3ad3c5428a7a14e92830b74e03fcaf7210,17782723583,1000000,436882,3212704,17782723583,True,True,2020-05-16 09:25:02+00:00,34750000000000000,0x3e24b9cd4c0208acc25259d4fcbddf75b48a554d68147acd13e656ea025b8c95,19000000000.0,500000.0,120497.0,9549420.0,19000000000.0,250000000000000,,,,,,,,0,,,,weth,v1.loan.fixed
1,v1.loan.fixed-1,1,v1.loan.fixed,repaid,0xc35A5FEc6BE6957899E15559Be252Db882220b37,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe,0.003,3.1e-21,7,173.809524,1970-01-01 00:00:01.589623507,2020-05-16 10:05:07+00:00,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,10076484,0x937a5320404c0c835f2739c84b229ef837a9f0b0ef92cbbf4a232b5b357bfc33,19755020080,500000,441106,9710213,19755020080,True,True,2020-05-16 11:50:26+00:00,3095000000000000,0x59d1c26160b7bdde716b380364b506e056d852ac1b360b265109f682a0b89706,16974490000.0,500000.0,122603.0,9470068.0,16974490000.0,5000000000000,,,,,,,,1,,,,weth,v1.loan.fixed


Unnamed: 0,transaction_hash,block_timestamp,loan_id,to_address,from_address,principal_amount,repayment_amount,erc20_address,erc20_name,due_date,duration_in_days,apr,token_id,collection_address,method,protocol,amt_in_usd,roll_over,block_number,loan_id_cleaned
4932,0x9d3ef0aa602a0c7fbd66a4cb58d4eb2c3c8c6df79a02bf54ad1723bfd25a533a,2023-05-29 09:02:47+00:00,37267,0x4a29367c5ae9f84ef03e447d1f7dee8e6b16229d,0x47a0dfeb07abebd5f77aa5ffaa18faecd7686b4f,0.741584,0.74707,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-06-28 09:02:47+00:00,30.0,0.09,2742,0x4e1f41613c9084fdb9e34e11fae9412427480e56,borrow,nftfi,1409.094022,,17363630,37267
4933,0x414c4d8a72b9b64905c100ade68dab60a2f66b2faa1bd23fc24774cfb7e597d8,2023-05-29 08:45:47+00:00,37266,0x4e239ef731d57f7fd2e30f38b79b5eae21b4dd18,0xc6a6f43d5d52c855ebe1f825c717937a7b901732,0.53,0.555266,0xc02aaa39b223fe8d0a0e5c4f27ead9083c756cc2,wETH,2023-06-28 08:45:47+00:00,30.0,0.58,1174,0xe785e82358879f061bc3dcac6f0444462d4b5330,borrow,nftfi,1007.009593,,17363545,37266


In [24]:
usd_diff = full_inner[['mq_amt_in_usd','nftfi_usd_value', 'nftfi_transaction_hash']]# .dropna()
display(usd_diff.head(2))
display(usd_diff.tail(2))
usd_diff['usd_diff_nftfi_minus_mq'] = usd_diff['nftfi_usd_value'] - usd_diff['mq_amt_in_usd']
display(usd_diff.tail(2))

Unnamed: 0,mq_amt_in_usd,nftfi_usd_value,nftfi_transaction_hash
0,5.884401,,0x7c7675a6613ddaded0b187314c2edd3ad3c5428a7a14e92830b74e03fcaf7210
1,0.600346,,0x937a5320404c0c835f2739c84b229ef837a9f0b0ef92cbbf4a232b5b357bfc33


Unnamed: 0,mq_amt_in_usd,nftfi_usd_value,nftfi_transaction_hash
40754,4120.361775,4136.602,0x49a1183d64fa141041920c35ee637713bc411390f112ce8dd0437b1dc784525d
40755,600.0,6e-10,0xfd2ae7f526cc0dd5d154e4a24b7d5531201e1b70fab52bd86f0943118bfb5c8f


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  usd_diff['usd_diff_nftfi_minus_mq'] = usd_diff['nftfi_usd_value'] - usd_diff['mq_amt_in_usd']


Unnamed: 0,mq_amt_in_usd,nftfi_usd_value,nftfi_transaction_hash,usd_diff_nftfi_minus_mq
40754,4120.361775,4136.602,0x49a1183d64fa141041920c35ee637713bc411390f112ce8dd0437b1dc784525d,16.240725
40755,600.0,6e-10,0xfd2ae7f526cc0dd5d154e4a24b7d5531201e1b70fab52bd86f0943118bfb5c8f,-600.0


In [28]:
display(pd.DataFrame(usd_diff.min().values).T)
nftfi.loc[nftfi['nftfi_transaction_hash']=='0x7c7675a6613ddaded0b187314c2edd3ad3c5428a7a14e92830b74e03fcaf7210'].T
# display(pd.DataFrame(usd_diff.min().T['nftfi_transaction_hash'])

Unnamed: 0,0,1,2,3
0,0.0,0.0,0x000070f5e36faa0a41aca41fc82d8021cb5187dbc23dfc10d785aa546025a631,-500499.999999


Unnamed: 0,0
nftfi_loan_no,v1.loan.fixed-0
nftfi_id,0
nftfi_loan_contract,v1.loan.fixed
nftfi_status,repaid
nftfi_borrower,0xc35A5FEc6BE6957899E15559Be252Db882220b37
nftfi_lender,0x44DdF3e35Cd2D629d78674FF6BF5b953e2B069fe
nftfi_loan_principal_amount,0.03
nftfi_maximum_repayment_amount,0.0
nftfi_loan_duration_d,7
nftfi_apr,869.047619


In [26]:
pd.DataFrame(usd_diff.usd_diff_nftfi_minus_mq.describe())

Unnamed: 0,usd_diff_nftfi_minus_mq
count,38170.0
mean,-90.863351
std,3135.187697
min,-500499.999999
25%,-4.06756
50%,0.0
75%,3.51003
max,4868.512


## Check per collection borrow volume: all time, monthly, daily etc

In [37]:
full_inner_without_nftfi_na = full_inner.dropna(subset=['nftfi_usd_value'])
full_inner_without_nftfi_na.colu

nftfi_loan_no                                        object
nftfi_id                                              int64
nftfi_loan_contract                                  object
nftfi_status                                         object
nftfi_borrower                                       object
nftfi_lender                                         object
nftfi_loan_principal_amount                         float64
nftfi_maximum_repayment_amount                      float64
nftfi_loan_duration_d                                 int64
nftfi_apr                                           float64
nftfi_loan_start_time                        datetime64[ns]
nftfi_date                              datetime64[ns, UTC]
nftfi_loan_erc20denomination                         object
nftfi_block_num                                       int64
nftfi_transaction_hash                               object
nftfi_gas_price                                       int64
nftfi_gas_limit                         

## Compute April subset for each table

In [27]:
april_mq = mq.loc[mq['block_timestamp'] >= '2023-04-01'].loc[mq['block_timestamp'] < '2023-05-01']
april_mq.shape

KeyError: 'block_timestamp'

In [None]:
april_nftfi = nftfi.loc[nftfi['date'] >= '2023-04-01'].loc[nftfi['date'] < '2023-05-01']
april_nftfi.shape

#### Check for duplicates

In [None]:
april_nftfi.loc[april_nftfi.loan_no_cleaned.duplicated()== True]

In [None]:
display(april_mq.loc[april_mq.loan_id_cleaned.duplicated()== True].head(5))
display(april_mq.loc[april_mq.loan_id_cleaned.duplicated()== True].shape[0])

In [None]:
april_nftfi.loc[april_nftfi.loan_no_cleaned.duplicated()== True]

In [None]:
display(april_mq.head(3).sort_values('loan_id_cleaned', ascending=False))
display(april_nftfi.head(3).sort_values('loan_no_cleaned', ascending=False))

In [None]:
# Determine the set of unique IDs present in MQ and not in NFTfi
len(set(list(april_mq.loan_id_cleaned)) - set(list(april_nftfi.loan_no_cleaned)))

In [None]:
# Determine the set of unique IDs present in NFTfi and not in MQ's
len(set(list(april_nftfi.loan_no_cleaned)) - set(list(april_mq.loan_id_cleaned)))

In [None]:
april_mq.shape, april_nftfi.shape

In [None]:
april_nftfi.shape[0] - april_mq.shape[0]

## Compute which Loans are available in NFTfi's and not in MQ's, and the converse, by matching on transaction hashes

In [None]:
# how do i get all rows in df1 that are not in df2
# and all rows in df2 that are not in df1?

# Perform a full outer join on df1 and df2
full_outer = pd.merge(april_nftfi, april_mq, how='outer', on='transaction_hash', indicator=True)

# Filter out the rows that are unique to df1
unique_to_april_nftfi = full_outer[full_outer['_merge'] == 'left_only']

# Filter out the rows that are unique to df2
unique_to_april_mq = full_outer[full_outer['_merge'] == 'right_only']


In [None]:
print(f"number of loans unique to NFTfi relative to MQ in April 2023: {unique_to_april_nftfi.shape[0]}, i.e. {100 * round(unique_to_april_nftfi.shape[0] / april_nftfi.shape[0], 2)}%")
print(f"while in April 2023 NFTfi had {april_nftfi.shape[0]} loans while mq had {april_mq.shape[0]}, i.e. for April NFTfi has {april_nftfi.shape[0] - april_mq.shape[0]} more loans")
display(unique_to_april_nftfi.shape[0])
display(unique_to_april_nftfi.head(2))

In [None]:
print(f"number of loans unique to MQ relative to NFTfi in April 2023: {unique_to_april_mq.shape[0]}, i.e. {100 * round(unique_to_april_mq.shape[0] / april_mq.shape[0], 2)}%")
print(f"while in April 2023 NFTfi had {april_nftfi.shape[0]} loans while mq had {april_mq.shape[0]}, i.e. for April NFTfi has {april_nftfi.shape[0] - april_mq.shape[0]} more loans")

display(unique_to_april_mq.shape[0])
display(unique_to_april_mq.head(2))

In [None]:
pd.set_option('display.float_format', '{:,.0f}'.format)
pd.DataFrame(unique_to_april_nftfi.groupby('loan_contract').agg(sum)['usd_value'])

In [None]:
# MQ loans not in NFTfi
# unique_to_april_mq = april_mq[april_mq['loan_id_cleaned'].isin(mq_loans_not_in_nftfi)].reset_index(drop=True)
unique_to_april_mq.head(3)

In [None]:
# We need to explore this table
unique_to_april_mq.erc20_name.unique()

## NFTfi loans not in MQ

In [None]:
unique_to_april_nftfi

In [None]:
# Explore the resulting set of unique_to_april_nftfi. Is there any pattern?
unique_to_april_nftfi.describe()

### Are discrepancies due to loan type which is not accounted for?

In [None]:
# How about loan numbers, how do they vary from a naming standpoint?
# x[0]+x[1] if len(x) >2 else x[0] is such that it returns "v2-1.loan.fixed" from "v2-1.loan.fixed-32381", 
#    else returns "v2.loan.fixed.collection" from "v2.loan.fixed.collection-34479"

unique_to_april_nftfi.loan_no.astype(str).str.split('-').apply(lambda x: x[0]+'-'+x[1] if len(x) >2 else x[0]).unique()
# it seems from the above result that [v2-1.loan.fixed', 'v2.loan.fixed.collection'] are the two loan types which are not accounted for

In [None]:
# Is the above true as well for the whole table set?
nftfi_loans_not_in_mq = list(
        set(list(nftfi.loan_no_cleaned)) - set(list(mq.loan_id_cleaned))
    )
print(len(nftfi_loans_not_in_mq))
# How about loan numbers, how do they vary from a naming standpoint?
# x[0]+x[1] if len(x) >2 else x[0] is such that it returns "v2-1.loan.fixed" from "v2-1.loan.fixed-32381", 
#    else returns "v2.loan.fixed.collection" from "v2.loan.fixed.collection-34479"

unique_to_april_nftfi.loan_no.astype(str).str.split('-').apply(lambda x: x[0]+'-'+x[1] if len(x) >2 else x[0]).unique()
# it seems from the above result that [v2-1.loan.fixed', 'v2.loan.fixed.collection'] are the two loan types which are not accounted for

### Is it due to missed erc20 token denomination?

In [None]:
# Check if it can be due to missing token
usdc_address = '0xA0b86991c6218b36c1d19D4a2e9Eb0cE3606eB48'  # https://etherscan.io/token/0xa0b86991c6218b36c1d19d4a2e9eb0ce3606eb48
usdt_address = '0xdAC17F958D2ee523a2206206994597C13D831ec7'  # https://etherscan.io/token/0xdac17f958d2ee523a2206206994597c13d831ec7
dai_address = '0x6B175474E89094C44Da98b954EedeAC495271d0F'  # https://etherscan.io/token/0x6b175474e89094c44da98b954eedeac495271d0f
weth_address = '0xC02aaA39b223FE8D0A0e5C4F27eAD9083C756Cc2'

stablecoin_addresses = [usdc_address, usdt_address, dai_address, weth_address]

stablecoin_names = ['usdc', 'usdt', 'dai', 'weth']
for stablecoin_address, stablecoin_name in zip(stablecoin_addresses, stablecoin_names):
    stablecoin_address = stablecoin_address.lower()
    unique_to_april_nftfi.loc[unique_to_april_nftfi['loan_erc20denomination'] == stablecoin_address, 'loan_erc20denomination_name'] = stablecoin_name

In [None]:
unique_to_april_nftfi.loan_erc20denomination_name.unique()
# It does not seem to be linked to missing token.

### Let's compute loan volumes per token denomination to see if they match

In [None]:
display(april_nftfi.head(2))
display(april_mq.head(2))

In [None]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
pd.set_option('display.float_format', '{:,.2f}'.format)
display(april_nftfi.groupby('loan_erc20denomination_name').agg('sum'))

display(april_mq.groupby('erc20_name').agg('sum'))


In [None]:

print("NFTfi:")
display(pd.DataFrame(april_nftfi.groupby('loan_erc20denomination_name').agg('sum')['loan_principal_amount']))
print("\n\nMQ:")
display(pd.DataFrame(april_mq.groupby('erc20_name').agg('sum')['principal_amount']))

#### We see from the above results that NFTfi and MQ respective loan principal amounts grouped by the loan erc20 denomination do not match at all. How about the total dollar numbers?

In [None]:
print("NFTfi:")
total_usd_nftfi_volume_per_ccy = pd.DataFrame(april_nftfi.groupby('loan_erc20denomination_name').agg('sum')['usd_value'])
display(total_usd_nftfi_volume_per_ccy)
print("\n\nMQ:")
total_usd_mq_volume_per_ccy = pd.DataFrame(april_mq.groupby('erc20_name').agg('sum')['amt_in_usd'])
display(total_usd_mq_volume_per_ccy)

#### Total numbers are very off. Can that be explained with the set of loans which are not in each dataset?

In [None]:
print("Total dollar value of loan principal for NFTfi loans which are not in MQ, for April and NFTfi:")
display(pd.DataFrame(unique_to_april_nftfi.groupby('loan_erc20denomination_name').agg('sum')['usd_value']))
print("\n\nTotal dollar value of loan principal for MQ loans which are not inNFtfi, for April and MQ:")
display(pd.DataFrame(unique_to_april_mq.groupby('erc20_name').agg('sum')['amt_in_usd']))

In [None]:
usd_values_of_nftfi_not_in_mq = pd.DataFrame(unique_to_april_nftfi.groupby('loan_erc20denomination_name').agg('sum')['usd_value'])
usd_values_of_mq_not_in_nftfi = pd.DataFrame(unique_to_april_mq.groupby('erc20_name').agg('sum')['amt_in_usd'])

In [None]:
weth_usd_amount_nftfi_not_in_mq = usd_values_of_nftfi_not_in_mq.loc['weth'].values[0]
weth_usd_amount_mq_not_in_nftfi = usd_values_of_mq_not_in_nftfi.loc['wETH'].values[0]

weth_usd_amount_total_nftfi = total_usd_nftfi_volume_per_ccy.loc['weth'].values[0]
weth_usd_amount_total_mq = total_usd_mq_volume_per_ccy.loc['wETH'].values[0]

print(weth_usd_amount_total_nftfi, weth_usd_amount_nftfi_not_in_mq, weth_usd_amount_mq_not_in_nftfi, weth_usd_amount_total_mq)

# total NFTfi missing from MQ volume, minus total MQ volume missing from NFTfi

weth_usd_amount_nftfi_not_in_mq - weth_usd_amount_mq_not_in_nftfi - weth_usd_amount_total_nftfi  + weth_usd_amount_total_mq

#### How about dollar amounts PER loan type PER ccy?

In [None]:
usd_values_of_nftfi_not_in_mq = pd.DataFrame(unique_to_april_nftfi.groupby(['loan_erc20denomination_name','loan_type']).agg('sum')['usd_value'])
usd_values_of_nftfi_not_in_mq

In [None]:
print("NFTfi:")
display(total_usd_nftfi_volume_per_ccy)
print("\n\nMQ:")
display(total_usd_mq_volume_per_ccy)

## For the loan IDs which do match... are they the same loans?

In [None]:
import copy
nftfi_with_col_prefix = copy.deepcopy(nftfi)
mq_with_col_prefix = copy.deepcopy(mq)
nftfi_with_col_prefix = nftfi_with_col_prefix.rename(columns={col: 'nftfi_'+ col for col in nftfi.columns})
mq_with_col_prefix = mq_with_col_prefix.rename(columns={col: 'mq_'+ col for col in mq.columns})

matching_mq_with_nftfi = pd.merge(left=nftfi_with_col_prefix, right=mq_with_col_prefix, left_on='nftfi_loan_no_cleaned', right_on='mq_loan_id_cleaned', how='inner', suffixes=('nftfi', 'mq'))
matching_mq_with_nftfi

In [None]:
subset_of_matching_mq_with_nftfi = matching_mq_with_nftfi[['nftfi_date', 'mq_block_timestamp', 'nftfi_usd_value', 'mq_amt_in_usd', 'nftfi_loan_no_cleaned', 'nftfi_loan_type', 'mq_loan_id_cleaned']]
subset_of_matching_mq_with_nftfi

In [None]:
pd.options.plotting.backend = "plotly"
subset_of_matching_mq_with_nftfi['dollar_diff'] = subset_of_matching_mq_with_nftfi['nftfi_usd_value'] - subset_of_matching_mq_with_nftfi['mq_amt_in_usd']

subset_of_matching_mq_with_nftfi.nftfi_date = pd.to_datetime(subset_of_matching_mq_with_nftfi.nftfi_date)
subset_of_matching_mq_with_nftfi.mq_block_timestamp = pd.to_datetime(subset_of_matching_mq_with_nftfi.mq_block_timestamp)

subset_of_matching_mq_with_nftfi.plot.scatter(x=subset_of_matching_mq_with_nftfi.nftfi_date, y=subset_of_matching_mq_with_nftfi.dollar_diff)

In [None]:
subset_of_matching_mq_with_nftfi['date_diff'] = subset_of_matching_mq_with_nftfi['nftfi_date'] - subset_of_matching_mq_with_nftfi['mq_block_timestamp']
subset_of_matching_mq_with_nftfi.plot.scatter(x=subset_of_matching_mq_with_nftfi.nftfi_date, y=subset_of_matching_mq_with_nftfi.date_diff)

In [None]:
import numpy as np
subset_of_matching_mq_with_nftfi['date_diff'] = subset_of_matching_mq_with_nftfi['nftfi_date'] - subset_of_matching_mq_with_nftfi['mq_block_timestamp']
non_zero_time_delta = subset_of_matching_mq_with_nftfi.loc[subset_of_matching_mq_with_nftfi['date_diff'] > np.timedelta64(0)]
display(non_zero_time_delta)
display(non_zero_time_delta.nftfi_loan_type.unique())

print(f"min, max nftfi [{non_zero_time_delta.nftfi_date.min(), non_zero_time_delta.nftfi_date.max()}]")
print(f"min, max mq [{non_zero_time_delta.mq_block_timestamp.min(), non_zero_time_delta.mq_block_timestamp.max()}]")

print(f"\n\nTotal dollar diff for non zero time deltas (IDs not matching) [{non_zero_time_delta.dollar_diff.sum()}]")
print(f"Total dollar diff for both datasets [{subset_of_matching_mq_with_nftfi.dollar_diff.sum()}]")

In [None]:
nftfi.date = pd.to_datetime(nftfi.date)
time_delta = pd.Timedelta(minutes=20)
datemax_minus_time_delta = non_zero_time_delta.nftfi_date.max() - time_delta
datemax_plus_time_delta = non_zero_time_delta.nftfi_date.max() + time_delta

print(non_zero_time_delta.nftfi_date.max())
nftfi.loc[nftfi.date < datemax_plus_time_delta].loc[datemax_minus_time_delta < nftfi.date].sort_values('date')

In [None]:
non_zero_time_delta.loc[non_zero_time_delta['dollar_diff'] == 0]

### Are missing loans from MQ relative to NFTfi, bundles?

In [None]:
display(unique_to_april_nftfi.collection_name.unique())
len(unique_to_april_nftfi.collection_name.unique())

In [None]:
display(unique_to_april_mq.collection_address.unique()[:3])
len(unique_to_april_mq.collection_address.unique())

## How are liquidations accounted for?

In [None]:
# TODO:
# spreadsheet of different transactions that are missing, or are more of them on MQ dataset. we need clear doc discussing which tsx are missing, which are added,
# what are the volume differences coming from.

# try and check that the eth value of those loans per transaction is the same.
# check if volumes per loanerc20 collateral are matching

## Sandbox

In [None]:
try: 
    volume = pd.read_csv('analysis/nftfi_cluster_data/3month_200apr_with_borrow_volume.csv')
    base_data = pd.read_csv('analysis/nftfi_cluster_data/20230522_CADLabs_NFTfi NFT collection APR ranges_vDraft - past 3M; loan count 20; APRs 200; 5 clusters.csv')
except FileNotFoundError:
    volume = (
        pd.read_csv('nftfi_cluster_data/3month_200apr_with_borrow_volume.csv')
        .drop(columns=['mean', 'median', 'std', 'count'])
    )
    base_data = (
        pd.read_csv('nftfi_cluster_data/20230522_CADLabs_NFTfi NFT collection APR ranges_vDraft - past 3M; loan count 20; APRs 200; 5 clusters.csv')
        .drop(columns=['Unnamed: 0', 'Unnamed: 7', 'Unnamed: 8'])
    )

In [None]:
display(volume.head(2))
display(base_data.head(2))

In [None]:
nftfi = pd.merge(left=base_data, right=volume, on='collection_name')
nftfi 
# list(nftfi['borrow_volume'].values)