In [53]:
import pandas as pd
import numpy as np

from elasticsearch_dsl import Search
from dotenv import load_dotenv
from sklearn.neighbors import NearestNeighbors

from config import elastic_search
from model import retrieve_data, expand_metadata, input_punk, update_punks, prediction_labels

In [54]:
load_dotenv()

#Connections
elastic = elastic_search()

In [162]:
df = pd.read_csv('punks.csv', index_col=0)

In [129]:
id = 4269

In [141]:
df['totalDecimalPrice'].fillna(0.0, inplace=True)
df['usdPrice'].fillna(0.0, inplace=True)
df['blockTimestamp'] = pd.to_datetime(df['blockTimestamp'])
start_date = df['blockTimestamp'].min()
df['blockTimestamp'].fillna(start_date, inplace=True)
df.fillna(0.0, inplace=True)
df.replace([np.inf, -np.inf], np.nan, inplace=True)

In [142]:
df.isnull().sum()

blockTimestamp          0
usdPrice                0
totalDecimalPrice       0
tags_3D Glasses         0
tags_Alien              0
                       ..
tags_Welding Goggles    0
tags_Wild Blonde        0
tags_Wild Hair          0
tags_Wild White Hair    0
tags_Zombie             0
Length: 95, dtype: int64

In [163]:
df

Unnamed: 0_level_0,blockTimestamp,usdPrice,totalDecimalPrice,tags_3D Glasses,tags_Alien,tags_Ape,tags_Bandana,tags_Beanie,tags_Big Beard,tags_Big Shades,...,tags_Tiara,tags_Top Hat,tags_VR,tags_Vampire Hair,tags_Vape,tags_Welding Goggles,tags_Wild Blonde,tags_Wild Hair,tags_Wild White Hair,tags_Zombie
assetId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,2017-06-23 22:56:29+00:00,325.275529,0.980,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2022-05-15 02:31:59+00:00,39746.704800,19.540,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2017-06-23 21:05:06+00:00,0.000000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
3,2017-06-23 21:05:06+00:00,0.000000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
4,2017-06-23 21:05:06+00:00,0.000000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9995,2017-06-23 21:05:06+00:00,0.000000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9996,2017-06-23 21:05:06+00:00,0.000000,0.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9997,2020-09-27 00:50:37+00:00,19656.189250,60.475,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
9998,2021-08-11 04:05:28+00:00,347455.532940,110.000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0


In [172]:
from datetime import date

In [186]:
today = date.today()

In [188]:
df = df.copy()
    
target = ['totalDecimalPrice', 'usdPrice', 'blockTimestamp']

# droping target from data matrix
df_data = df.drop(target, axis=1)

# set target
df_target = df[target]

# fit on data, 12 neighbors
nn = NearestNeighbors(algorithm='brute', metric='cosine', leaf_size =15, n_neighbors=50, n_jobs=-1)
nn.fit(df_data)

# query point 
input_index = id

# vectorize 
data_vect = df_data[df_data.index == input_index].values
neigh_dist, neigh_indices = nn.kneighbors(data_vect)
indexs = neigh_indices.flat[0:25].tolist()

#adding url to each track
output = df_target.iloc[indexs].copy()

output['image_url'] = 'https://www.larvalabs.com/cryptopunks/cryptopunk' + output.index.astype(str) + '.png'
output['ranking'] = np.arange(len(output))
output['blockTimestamp'] = pd.to_datetime(output['blockTimestamp']).dt.date
output['days_old'] = (today - output['blockTimestamp']).dt.days

output = output.sort_values(by=['blockTimestamp'], ascending=False)
output = output.loc[~output.index.isin([input_index])]
output = output[0:5]
usd_mean = output['usdPrice'].mean()
eth_mean = output['totalDecimalPrice'].mean()

output['usdPrice'] = output['usdPrice'].apply(lambda x: "${:.1f}k".format((x/1000)))
#output['blockTimestamp'] = pd.to_datetime(output['blockTimestamp'], errors='ignore').dt.tz_convert(None)
final = output[['blockTimestamp', 'usdPrice', 'totalDecimalPrice', 'ranking']]
final.reset_index(inplace=True)
final.rename(columns={'assetId': 'ID', 'blockTimestamp': 'Date', 'usdPrice': 'USD', 'totalDecimalPrice': 'ETH', 'ranking': 'Ranking'}, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [189]:
output

Unnamed: 0_level_0,totalDecimalPrice,usdPrice,blockTimestamp,image_url,ranking,days_old
assetId,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
983,148.0,$481.1k,2021-08-28,https://www.larvalabs.com/cryptopunks/cryptopu...,0,260
9595,95.95,$311.9k,2021-08-28,https://www.larvalabs.com/cryptopunks/cryptopu...,11,260
6358,39.99,$98.8k,2021-07-31,https://www.larvalabs.com/cryptopunks/cryptopu...,14,288
9540,21.95,$41.6k,2021-07-17,https://www.larvalabs.com/cryptopunks/cryptopu...,23,302
1276,0.007563,$0.0k,2021-04-23,https://www.larvalabs.com/cryptopunks/cryptopu...,18,387


In [144]:
table_name = 'nft_sales_cryptopunks_ͼ'
raw_data = retrieve_data(table_name)
last_trade = raw_data.groupby(['assetId'], as_index=False).last()

project = table_name.split('_')[2]
ticker = table_name.split('_')[3]

fixed_set = prediction_labels(project, ticker)

combined = pd.merge(fixed_set, last_trade, how='left', on=['assetId'], suffixes=(None, '_y'))
combined = combined.loc[:,~combined.columns.str.contains('_y')]

expanded = expand_metadata(combined)

df = expanded.copy()
df['totalDecimalPrice'].fillna(0.0, inplace=True)
df['usdPrice'].fillna(0.0, inplace=True)
df['blockTimestamp'] = pd.to_datetime(df['blockTimestamp'])
start_date = df['blockTimestamp'].min()
df['blockTimestamp'].fillna(start_date, inplace=True)
df.fillna(0.0, inplace=True)
df.replace([np.inf, -np.inf], np.nan, inplace=True)

(3/6) Exploding meta data column into mutiple columns


In [145]:
def prep(df):
    to_drop = ['transactionHash', 'nftAddress', 'marketAddress', 'tokenAddress',
            'feeUSDPrice', 'feeTotalPrice', 'buyer_name', 'seller_name', 'wallets',
            'feeTotalDecimalPrice', 'tokenTicker', 'blockNumber',
            'feeCollectors', 'buyer', 'seller', 'blockchain', 'project', 'nftTicker',
            'saleType', 'totalPrice', 'image']

    df = df.drop(to_drop, axis=1)
    df['blockTimestamp'] = pd.to_datetime(df['blockTimestamp'])
    df.set_index('assetId', inplace=True)
    return df

In [146]:
test = prep(df)

In [147]:
test.to_csv('punks.csv')

In [None]:
df = pd.read_csv('punks.csv')