In [1]:
import secrets

import sqlalchemy
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD


In [2]:
DB_NAME = 'chainsync_preprod'
num_chars_per_tokens = 2

cardanoscan_url = 'https://cardanoscan.io/transaction/'
open_url = 'https://mainnet.marlowescan.com/contractView?contractId='

# cardanoscan_url = 'https://preprod.cardanoscan.io/transaction/'
# open_url = 'https://preprod.marlowescan.com/contractView?contractId='


In [3]:
engine = sqlalchemy.create_engine('postgresql://'+secrets.DB_USER+':'+secrets.DB_PASSWORD+'@'+secrets.DB_HOST+'/'+DB_NAME)

In [4]:
df_labels = pd.read_sql('SELECT * FROM analysis.contracts_label_training', engine)

In [5]:
def stringToSent(val):
    words = [(val[i:i+num_chars_per_tokens]) for i in range(0, len(val), num_chars_per_tokens)]
    sentence = ' '.join(words)
    return sentence

df_labels['tokens'] = df_labels.apply(lambda row: stringToSent(row['hex']),axis=1)
df_labels['length'] = df_labels.apply(lambda row: len(row['tokens'].split()),axis=1)

# vectorizer = TfidfVectorizer()
# tfidf_matrix = vectorizer.fit_transform(df['tokens'])
# print(tfidf_matrix.shape)

In [6]:
df_labelsstuctures

Unnamed: 0,id,hex,label,tokens,length
0,b0cb2a22dc4532af3c8c8403e2208cf2d4de15a2d3ed12...,d8799fd8799f581cbea98f0b224a3702e528928bd775bb...,escrow,d8 79 9f d8 79 9f 58 1c be a9 8f 0b 22 4a 37 0...,639
1,368517bb48878442b7a58b4c4b5474cb4d83ed7af9d333...,d8799fd8799f581c5c7fcc954e1bd89e341923f2b9b671...,zerocouponbond,d8 79 9f d8 79 9f 58 1c 5c 7f cc 95 4e 1b d8 9...,377
2,a360035b7252963e7b1781fc936da41af754c40257e6c7...,d8799fd8799f581cee0b96031993a01dafa0c271439814...,swap,d8 79 9f d8 79 9f 58 1c ee 0b 96 03 19 93 a0 1...,455
3,6e90612f42613131939c2b7cb87b0c20202545422840e7...,d8799fd8799f581c3341fe984c724915cebac76ff65251...,contractfordifferenceswithoracle,d8 79 9f d8 79 9f 58 1c 33 41 fe 98 4c 72 49 1...,1024
4,f394a3b33f5cdab847faa06c2e395742f9e14f5a12c1e2...,d8799fd8799f581cb2a73a577af22ed684c282a1c28f04...,contractfordifferences,d8 79 9f d8 79 9f 58 1c b2 a7 3a 57 7a f2 2e d...,1108


In [7]:
df_contracts = pd.read_sql('SELECT * FROM analysis.contracts', engine)

In [8]:
df_contracts['tokens'] = df_contracts.apply(lambda row: stringToSent(row['hex']),axis=1)
df_contracts['length'] = df_contracts.apply(lambda row: len(row['tokens'].split()),axis=1)
df_contracts

Unnamed: 0,id,blockno,slotno,hex,tokens,length
0,000a5348883fb3c0b96936cb7a51787024b9b71b31de80...,628191,20681601,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,1686
1,000fd04097339261e47db646ca0c597669e215846773ef...,457275,16637332,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,221
2,0014afa55bcfa78e41c8312a34fab4261c98565a37f1a0...,355243,14401271,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,402
3,0024f31fea21a4161cdb1d735fc9f11d595435ba2018fb...,1032781,30644824,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,597
4,002b0898ba9ed01da644ca4cdba510106fdb7b92ad5e1a...,1096532,32152686,d8799fd8799f581c8bb3b343d8e404472337966a722150...,d8 79 9f d8 79 9f 58 1c 8b b3 b3 43 d8 e4 04 4...,318
...,...,...,...,...,...,...
7078,ffc4dbec4f18a042d1c5b11ee6767edb9250c3784f6503...,1358871,38540222,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,264
7079,ffc6ba56d15e7cd0f67604da030a54fb26991a63611d92...,446028,16397028,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,713
7080,ffccac42ba58c88fd5e96c93b97789902cb0028517a08f...,1044278,30911714,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,1258
7081,ffe7b10f8bbb4f6a5bba759fa0d2c091c904356de5b3eb...,1033215,30654339,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,597


In [9]:
from difflib import SequenceMatcher

def similar(a, b):
    return SequenceMatcher(None, a, b).ratio()

similar("Apple is good","Appel is good")


0.9230769230769231

In [10]:
similar(
    df_labels.iloc[0].tokens,
    df_contracts.iloc[0].tokens,
)

0.005162770686935322

In [17]:
def checkSimilarity(val, comparedTo):
    return similar(val, comparedTo)


df_contracts['compare_escrow'] = df_contracts.apply(lambda row: checkSimilarity(row['hex'], df_labels.iloc[0].hex),axis=1)
df_contracts['compare_zerocouponbond'] = df_contracts.apply(lambda row: checkSimilarity(row['hex'], df_labels.iloc[1].hex),axis=1)
df_contracts['compare_swap'] = df_contracts.apply(lambda row: checkSimilarity(row['hex'], df_labels.iloc[2].hex),axis=1)
df_contracts['compare_contractfordifferenceswithoracle'] = df_contracts.apply(lambda row: checkSimilarity(row['hex'], df_labels.iloc[3].hex),axis=1)
df_contracts['compare_contractfordifferences'] = df_contracts.apply(lambda row: checkSimilarity(row['hex'], df_labels.iloc[4].hex),axis=1)


In [18]:
df_contracts

Unnamed: 0,id,blockno,slotno,hex,tokens,length,compare_escrow,compare_zerocouponbond,compare_swap,compare_contractfordifferenceswithoracle,compare_contractfordifferences
0,000a5348883fb3c0b96936cb7a51787024b9b71b31de80...,628191,20681601,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,1686,0.005161,0.009695,0.005605,0.004428,0.004295
1,000fd04097339261e47db646ca0c597669e215846773ef...,457275,16637332,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,221,0.013953,0.035117,0.017751,0.009639,0.009029
2,0014afa55bcfa78e41c8312a34fab4261c98565a37f1a0...,355243,14401271,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,402,0.011527,0.026958,0.014002,0.008415,0.007947
3,0024f31fea21a4161cdb1d735fc9f11d595435ba2018fb...,1032781,30644824,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,597,0.009709,0.021561,0.011407,0.007403,0.007038
4,002b0898ba9ed01da644ca4cdba510106fdb7b92ad5e1a...,1096532,32152686,d8799fd8799f581c8bb3b343d8e404472337966a722150...,d8 79 9f d8 79 9f 58 1c 8b b3 b3 43 d8 e4 04 4...,318,0.016719,0.034532,0.020699,0.011923,0.011220
...,...,...,...,...,...,...,...,...,...,...,...
7078,ffc4dbec4f18a042d1c5b11ee6767edb9250c3784f6503...,1358871,38540222,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,264,0.013289,0.031201,0.016690,0.009317,0.008746
7079,ffc6ba56d15e7cd0f67604da030a54fb26991a63611d92...,446028,16397028,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,713,0.008876,0.018349,0.010274,0.006908,0.006590
7080,ffccac42ba58c88fd5e96c93b97789902cb0028517a08f...,1044278,30911714,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,1258,0.006326,0.012844,0.007005,0.005259,0.005072
7081,ffe7b10f8bbb4f6a5bba759fa0d2c091c904356de5b3eb...,1033215,30654339,d8799fd8799f40ffd8799fa1d8799fd8799fd87980d879...,d8 79 9f d8 79 9f 40 ff d8 79 9f a1 d8 79 9f d...,597,0.009709,0.022587,0.011407,0.007403,0.007038


In [19]:
df_contracts.nlargest(n=5, columns=['compare_zerocouponbond'])


Unnamed: 0,id,blockno,slotno,hex,tokens,length,compare_escrow,compare_zerocouponbond,compare_swap,compare_contractfordifferenceswithoracle,compare_contractfordifferences
1509,368517bb48878442b7a58b4c4b5474cb4d83ed7af9d333...,1472874,41320711,d8799fd8799f581c5c7fcc954e1bd89e341923f2b9b671...,d8 79 9f d8 79 9f 58 1c 5c 7f cc 95 4e 1b d8 9...,377,0.015748,1.0,0.019231,0.01142,0.010774
4850,b0cb2a22dc4532af3c8c8403e2208cf2d4de15a2d3ed12...,1472814,41319333,d8799fd8799f581cbea98f0b224a3702e528928bd775bb...,d8 79 9f d8 79 9f 58 1c be a9 8f 0b 22 4a 37 0...,639,1.0,0.26378,0.014625,0.009621,0.009731
3008,6e90612f42613131939c2b7cb87b0c20202545422840e7...,1473035,41324557,d8799fd8799f581c3341fe984c724915cebac76ff65251...,d8 79 9f d8 79 9f 58 1c 33 41 fe 98 4c 72 49 1...,1024,0.009621,0.191292,0.010818,1.0,0.007505
3618,85f0e180a8aa01881d5e49529111785f3022578db9272e...,1303449,37207615,d8799fd8799f581c8bb3b343d8e404472337966a722150...,d8 79 9f d8 79 9f 58 1c 8b b3 b3 43 d8 e4 04 4...,77,0.022346,0.052863,0.030075,0.014532,0.013502
5789,d20f3c74397e4a7e978ac5fdf7c5eb20eb3b1d0dc512a8...,1303427,37207135,d8799fd8799f581c8bb3b343d8e404472337966a722150...,d8 79 9f d8 79 9f 58 1c 8b b3 b3 43 d8 e4 04 4...,77,0.022346,0.052863,0.030075,0.014532,0.013502
