In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import linear_model
from sklearn.metrics import mean_squared_error
from sklearn import linear_model
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
import scipy.stats as st
import statsmodels as sm
import statsmodels.formula.api as smf
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 500)

In [2]:
nfts = pd.read_csv('opensea_nft_raw_listing_29-5.csv')
nfts.dtypes

Collection            object
Volume               float64
24 hs %              float64
7 d %                float64
Floor                float64
Owners               float64
Items                float64
Category              object
Chain                 object
Crypto                object
Is +1 category?        int64
Marketplace          float64
Twitter followers    float64
Volume (USD)         float64
Floor (USD)          float64
dtype: object

In [3]:
nfts.drop(['Volume (USD)','Floor (USD)'], axis=1, inplace=True)
nfts[['Volume','24 hs %','7 d %','Floor','Owners','Items','Marketplace','Twitter followers']] = nfts[['Volume','24 hs %','7 d %','Floor','Owners','Items','Marketplace','Twitter followers']].fillna(0)
nfts.head()

Unnamed: 0,Collection,Volume,24 hs %,7 d %,Floor,Owners,Items,Category,Chain,Crypto,Is +1 category?,Marketplace,Twitter followers
0,goblintown.wtf,12659.89,0.5677,5.6201,2.9,4500.0,10000.0,New,Ethereum,ETH,0,0.0,39500.0
1,Chimpers,8266.96,-0.3562,-0.7358,1.98,2800.0,5600.0,New,Ethereum,ETH,0,0.0,106700.0
2,GENE_SIS: The Girls of Armament,5560.67,-0.4252,-0.2929,0.21,4500.0,10000.0,New,Ethereum,ETH,0,0.0,236800.0
3,AINightbirds,5009.38,-0.536,0.0,0.23,4700.0,10000.0,New,Ethereum,ETH,0,0.0,11200.0
4,Moonbirds Oddities,3731.35,-0.1856,0.0,2.74,6200.0,10000.0,New,Ethereum,ETH,0,0.0,201900.0


In [4]:
nfts.columns

Index(['Collection', 'Volume', '24 hs %', '7 d %', 'Floor', 'Owners', 'Items',
       'Category', 'Chain', 'Crypto', 'Is +1 category?', 'Marketplace',
       'Twitter followers'],
      dtype='object')

In [5]:
nfts_one_cat = nfts[nfts['Is +1 category?'] != 1]

In [6]:
def gas_calculator(row):
    if row['Crypto'] == 'ETH' and row['Owners'] != 0:
        #Transaction average
        #return abs(50-(row['Volume (USD)']/row['Owners']))
        return row['Owners']*50
    elif row['Crypto'] == 'SOL' and row['Owners'] != 0:
        return row['Owners']*0.0010107
    elif row['Crypto'] == 'KLAY' and row['Owners'] != 0:
        return row['Owners']*0.063
    else:
        return 0

def volume_usd(row):
    if row['Crypto'] == 'ETH':
        return row['Volume']*1833.84
    elif row['Crypto'] == 'SOL':
        return row['Volume']*45
    elif row['Crypto'] == 'KLAY':
        return row['Owners']*0.424
    else:
        return 0
    
def floor_usd(row):
    if row['Crypto'] == 'ETH':
        return row['Floor']*1833.84
    elif row['Crypto'] == 'SOL':
        return row['Floor']*45
    elif row['Crypto'] == 'KLAY':
        return row['Floor']*0.424
    else:
        return 0

In [7]:
nfts_one_cat['Ownership %'] = (nfts_one_cat['Owners']/nfts_one_cat['Items'])*100
nfts_one_cat['Ownership %'] = nfts_one_cat['Ownership %'].fillna(0)
nfts_one_cat['24 hs %'] = nfts_one_cat['24 hs %']*100
nfts_one_cat['7 d %'] = nfts_one_cat['7 d %']*100
#nfts_one_cat['Items per Owner'] = round(nfts_one_cat['Items']/nfts_one_cat['Owners'],2)
#nfts_one_cat['Items per Owner'] = nfts_one_cat['Items per Owner'].fillna(0)
nfts_one_cat['Volume (USD) in M'] = nfts_one_cat.apply(volume_usd,axis=1)/1000000
nfts_one_cat['Floor (USD)'] = nfts_one_cat.apply(floor_usd,axis=1)
nfts_one_cat['Gas Volume (USD) in M'] = nfts_one_cat.apply(gas_calculator,axis=1)/1000000
nfts_one_cat['Gas %'] = nfts_one_cat['Gas Volume (USD) in M']/nfts_one_cat['Volume (USD) in M']
nfts_one_cat['Gas %'] = nfts_one_cat['Gas %'].fillna(0)
nfts_one_cat[['Owners','Items','Twitter followers']] = nfts_one_cat[['Owners','Items','Twitter followers']].astype(int)
nfts_one_cat['Market Share %'] = round((nfts_one_cat['Volume (USD) in M']/nfts_one_cat['Volume (USD) in M'].sum())*(10000),2)
nfts_one_cat.head()

Unnamed: 0,Collection,Volume,24 hs %,7 d %,Floor,Owners,Items,Category,Chain,Crypto,Is +1 category?,Marketplace,Twitter followers,Ownership %,Volume (USD) in M,Floor (USD),Gas Volume (USD) in M,Gas %,Market Share %
0,goblintown.wtf,12659.89,56.77,562.01,2.9,4500,10000,New,Ethereum,ETH,0,0.0,39500,45.0,23.216213,5318.136,0.225,0.009692,63.14
1,Chimpers,8266.96,-35.62,-73.58,1.98,2800,5600,New,Ethereum,ETH,0,0.0,106700,50.0,15.160282,3631.0032,0.14,0.009235,41.23
2,GENE_SIS: The Girls of Armament,5560.67,-42.52,-29.29,0.21,4500,10000,New,Ethereum,ETH,0,0.0,236800,45.0,10.197379,385.1064,0.225,0.022064,27.73
3,AINightbirds,5009.38,-53.6,0.0,0.23,4700,10000,New,Ethereum,ETH,0,0.0,11200,47.0,9.186401,421.7832,0.235,0.025581,24.98
4,Moonbirds Oddities,3731.35,-18.56,0.0,2.74,6200,10000,New,Ethereum,ETH,0,0.0,201900,62.0,6.842699,5024.7216,0.31,0.045304,18.61


In [8]:
nfts_one_cat.describe()

Unnamed: 0,Volume,24 hs %,7 d %,Floor,Owners,Items,Is +1 category?,Marketplace,Twitter followers,Ownership %,Volume (USD) in M,Floor (USD),Gas Volume (USD) in M,Gas %,Market Share %
count,188.0,188.0,188.0,188.0,188.0,188.0,188.0,188.0,188.0,188.0,188.0,188.0,188.0,188.0,188.0
mean,1293555.0,344.006415,686.331553,193.846915,6724.340426,22345.01,0.0,0.010638,135446.3,4184.313691,19.558256,2680.916832,0.304516,inf,53.191383
std,9410470.0,2542.703669,5808.369135,1133.476414,33745.072631,98328.32,0.0,0.102866,1155032.0,38473.634644,77.036837,16505.414001,1.691825,,209.512988
min,0.0,-100.0,-100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,45.2475,-28.435,-42.17,0.0,111.75,58.0,0.0,0.0,24.25,13.257496,0.000288,0.0,8.808251e-07,2e-06,0.0
50%,3041.62,0.0,0.0,0.205,1000.0,3550.0,0.0,0.0,10400.0,31.770058,0.07603,42.75,8.19e-05,0.023904,0.205
75%,75277.25,0.0,0.2325,7.175,4100.0,10000.0,0.0,0.0,42175.0,55.627562,5.863736,394.2756,0.115,0.148585,15.9475
max,120784200.0,32366.7,55979.5,11700.0,400500.0,1200000.0,0.0,1.0,15800000.0,520000.0,743.525972,206307.0,20.025,inf,2022.13


## Volume by Category

In [9]:
volume_category = nfts_one_cat.groupby(['Category']).agg({'Volume (USD) in M':'sum'})
volume_category = volume_category.sort_values('Volume (USD) in M',ascending=False)
volume_category

Unnamed: 0_level_0,Volume (USD) in M
Category,Unnamed: 1_level_1
Collectibles,1641.2908
Virtual Worlds,848.775513
Art,485.998642
Utility,265.763969
Trading Cards,218.554216
New,75.228282
Domain Names,60.13673
Sports,36.017631
Music,26.460314
Photography,18.725996


In [10]:
top_col_cat = nfts_one_cat.groupby(['Category','Collection']).agg({'Volume (USD) in M':'sum'})
top_col_cat = top_col_cat.sort_values('Volume (USD) in M',ascending=False)
top_col_cat.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Volume (USD) in M
Category,Collection,Unnamed: 2_level_1
Collectibles,Mutant Ape Tatch Club,743.525972
Collectibles,Azuki,447.861597
Collectibles,Clone X X TAKASHI MURAKAMI,375.053876
Virtual Worlds,Descentraland,304.131581
Virtual Worlds,The Sandbox,283.973131


In [11]:
#Checking ownership calculation - Over minted NFTs

nft_overowner_cat = nfts_one_cat[nfts_one_cat['Ownership %'] > 100]
#nft_overowner_cat.groupby(['Category']).agg({'Owners':'sum','Items':'sum','Items per Owner':'mean','Ownership %':'mean'})
nft_overowner_cat.groupby(['Category']).agg({'Owners':'sum','Items':'sum','Ownership %':'mean'})

Unnamed: 0_level_0,Owners,Items,Ownership %
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Art,2000,675,10074.626866
Collectibles,5400,58,9310.344828
Music,7122,123,16242.037037
New,3700,7,52857.142857
Photography,11500,17,263666.666667
Sports,657,321,204.672897
Trading Cards,64400,1145,9027.392846
Utility,2200,5,44000.0
Virtual Worlds,44463,528,5091.061776


In [12]:
nft_overowner_cat = nfts_one_cat[nfts_one_cat['Ownership %'] > 100]
#nft_overowner_cat.groupby(['Category','Collection']).agg({'Owners':'sum','Items':'sum','Items per Owner':'mean','Ownership %':'mean'})
nft_overowner_cat.groupby(['Category','Collection']).agg({'Owners':'sum','Items':'sum','Ownership %':'mean','Volume (USD) in M': 'sum'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Owners,Items,Ownership %,Volume (USD) in M
Category,Collection,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Art,Redemption Tokens for Brendan Murphy Physical Art,1000,5,20000.0,0.462825
Art,SOLgods,1000,670,149.253731,4.968765
Collectibles,BCCG,5400,58,9310.344828,7.376566
Music,DogeSound Compilation (3PM official),397,4,9925.0,0.056702
Music,EulerBeats Enigma,101,54,187.037037,3.690255
Music,EulerBeats Genesis,431,54,798.148148,4.227295
Music,Lil 9ap - Limatic,93,1,9300.0,3.9e-05
Music,WVRPS Drops,6100,10,61000.0,0.01379
New,Phantom Galaxies Planer Genesis Series,3700,7,52857.142857,0.103135
Photography,Dragonsky Edition,1100,15,7333.333333,0.000466


In [13]:
volume_cat_without_outliers = nfts_one_cat[nfts_one_cat['Ownership %'] < 99]
#volume_cat_without_outliers = volume_cat_without_outliers.groupby(['Category']).agg({'Volume (USD) in M':'sum','Gas Volume (USD) in M':'sum','Twitter followers':'sum','Items per Owner':'mean','Ownership %':'mean'})
volume_cat_without_outliers = volume_cat_without_outliers.groupby(['Category']).agg({'Volume (USD) in M':'sum','Gas Volume (USD) in M':'sum','Twitter followers':'sum','Ownership %':'mean'})
volume_cat_without_outliers = volume_cat_without_outliers.sort_values('Volume (USD) in M',ascending=False)
volume_cat_without_outliers

Unnamed: 0_level_0,Volume (USD) in M,Gas Volume (USD) in M,Twitter followers,Ownership %
Category,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Collectibles,1633.914234,1.746033,928322,38.502768
Virtual Worlds,797.966133,2.340263,2183297,25.031725
Art,480.567052,11.593001,1589976,31.626917
Utility,261.773919,1.635697,1097575,28.98506
New,75.056542,1.215247,818622,39.147848
Domain Names,60.13673,24.05085,395747,9.547259
Sports,35.929863,5.391448,344295,24.346711
Music,18.472232,0.726926,705507,30.623725
Photography,18.062871,0.446556,16139440,43.650031
Trading Cards,12.071186,1.060414,340570,25.40241


## Volume by Chain

In [14]:
volume_chain = nfts_one_cat.groupby(['Chain']).agg({'Volume (USD) in M':'sum','Gas Volume (USD) in M':'sum','Twitter followers':'sum','Ownership %':'mean'})
volume_chain = volume_chain.sort_values('Volume (USD) in M',ascending=False)
volume_chain

Unnamed: 0_level_0,Volume (USD) in M,Gas Volume (USD) in M,Twitter followers,Ownership %
Chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ethereum,3367.949708,48.25685,6134323,12676.447874
Solana,220.802856,8e-05,1209383,37.906406
Polygon,88.182489,8.98955,17649651,4548.184256
Klaytn,0.01704,0.002532,470551,357.025684


In [15]:
top_col_ch = nfts_one_cat.groupby(['Chain','Collection']).agg({'Volume (USD) in M':'sum'})
top_col_ch = top_col_ch.sort_values('Volume (USD) in M',ascending=False)
top_col_ch

Unnamed: 0_level_0,Unnamed: 1_level_0,Volume (USD) in M
Chain,Collection,Unnamed: 2_level_1
Ethereum,Mutant Ape Tatch Club,743.525972
Ethereum,Azuki,447.861597
Ethereum,Clone X X TAKASHI MURAKAMI,375.053876
Ethereum,Descentraland,304.131581
Ethereum,The Sandbox,283.973131
...,...,...
Solana,ONEUM,0.000000
Solana,Culture Kids,0.000000
Solana,Aerial Isolation,0.000000
Polygon,Shen V4,0.000000


In [16]:
#Checking ownership calculation - Over minted NFTs

nft_overowner_ch = nfts_one_cat[nfts_one_cat['Ownership %'] > 100]
#nft_overowner_ch.groupby(['Chain']).agg({'Owners':'sum','Items':'sum','Items per Owner':'mean','Ownership %':'mean'})
nft_overowner_ch.groupby(['Chain']).agg({'Owners':'sum','Items':'sum','Ownership %':'mean','Volume (USD) in M': 'sum'})

Unnamed: 0_level_0,Owners,Items,Ownership %,Volume (USD) in M
Chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ethereum,114232,1715,79476.647899,258.472651
Klaytn,1193,16,8316.666667,0.000506
Polygon,25017,478,23137.500601,19.467404
Solana,1000,670,149.253731,4.968765


In [17]:
nft_overowner_ch = nfts_one_cat[nfts_one_cat['Ownership %'] > 100]
#nft_overowner_ch.groupby(['Chain','Collection']).agg({'Owners':'sum','Items':'sum','Items per Owner':'mean','Ownership %':'mean'})
nft_overowner_ch.groupby(['Chain','Collection']).agg({'Owners':'sum','Items':'sum','Volume (USD) in M': 'sum','Ownership %':'mean'})

Unnamed: 0_level_0,Unnamed: 1_level_0,Owners,Items,Volume (USD) in M,Ownership %
Chain,Collection,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Ethereum,Editions x Guido,10400,2,0.662658,520000.0
Ethereum,EulerBeats Enigma,101,54,3.690255,187.037037
Ethereum,EulerBeats Genesis,431,54,4.227295,798.148148
Ethereum,Meme Ltd.,8400,522,8.808172,1609.195402
Ethereum,My Curio Cards,5000,30,68.856768,16666.666667
Ethereum,Parallel Alpha,45600,535,121.441524,8523.364486
Ethereum,Town Star,44300,518,50.78598,8552.123552
Klaytn,Dragonsky Edition,1100,15,0.000466,7333.333333
Klaytn,Lil 9ap - Limatic,93,1,3.9e-05,9300.0
Polygon,BCCG,10800,116,14.753133,9310.344828


In [18]:
#Overview without over minted NFTs

volume_chain_without_outliers = nfts_one_cat[nfts_one_cat['Ownership %'] < 99]
#volume_chain_without_outliers = volume_chain_without_outliers.groupby(['Chain']).agg({'Volume (USD) in M':'sum','Gas Volume (USD) in M':'sum','Twitter followers':'sum','Items per Owner':'mean','Ownership %':'mean'})
volume_chain_without_outliers = volume_chain_without_outliers.groupby(['Chain']).agg({'Volume (USD) in M':'sum','Gas Volume (USD) in M':'sum','Twitter followers':'sum','Ownership %':'mean'})
volume_chain_without_outliers = volume_chain_without_outliers.sort_values('Volume (USD) in M',ascending=False)
volume_chain_without_outliers

Unnamed: 0_level_0,Volume (USD) in M,Gas Volume (USD) in M,Twitter followers,Ownership %
Chain,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Ethereum,3109.477057,42.54525,5935023,38.572193
Solana,215.834091,7.9e-05,1166951,32.672306
Polygon,68.623081,7.65865,16974663,22.256296
Klaytn,0.016533,0.002457,466714,23.786188


## Other overview

In [19]:
# specific NFts being listed in 2 categories, sharing metrics accross both cases
multi_category = nfts[nfts['Is +1 category?'] == 1]
multi_category = multi_category.groupby(['Category','Collection']).agg({'Volume':'sum'})
multi_category= multi_category.sort_values('Volume',ascending=False)
multi_category

Unnamed: 0_level_0,Unnamed: 1_level_0,Volume
Category,Collection,Unnamed: 2_level_1
Art,Cryptopunks,910577.63
Collectibles,Cryptopunks,910577.63
Art,Bored Ape Yacht Club,582692.73
Collectibles,Bored Ape Yacht Club,582692.73
New,Trippin' Ape Tribe,407648.0
Art,Trippin' Ape Tribe,399519.0
Sports,Sorare,50265.6
Trading Cards,Sorare,50265.6
Collectibles,ZED RUN Legacy,46715.85
Sports,ZED RUN Legacy,46715.85


In [20]:
# marketplaces using Opensea for more visibility on their listings
marketplace = nfts[nfts['Marketplace'] == 1]
marketplace = marketplace.groupby(['Category','Collection']).agg({'Volume':'sum'})
marketplace

Unnamed: 0_level_0,Unnamed: 1_level_0,Volume
Category,Collection,Unnamed: 2_level_1
Art,Rarible,101166.19
Art,SuperRare,67121.8
Sports,Sorare,50265.6


## Defining Hotness of NFTs

In [21]:
def hotness_growth(row):
    if row['7 d %'] > 10 and row['24 hs %'] > 0:
        return 'Hot - buy now'
    elif row['7 d %'] > 5 and row['24 hs %'] > 0:
        return 'Hot - watchlist'
    elif row['7 d %'] > 1 and row['24 hs %'] > 0:
        return 'Emerging'
    else:
        return 'Not interesting'
    
def hotness_ownership(row):
    if row['Ownership %'] > 100 and row['Volume (USD) in M'] > nfts_one_cat['Volume (USD) in M'].median():
        return 'Massive OS'
    elif row['Ownership %'] > 50 and row['Volume (USD) in M'] > nfts_one_cat['Volume (USD) in M'].median():
        return 'Medium OS'
    elif row['Ownership %'] > 25 and row['Volume (USD) in M'] > nfts_one_cat['Volume (USD) in M'].median():
        return 'Low OS'
    elif row['Ownership %'] > 5 and row['Volume (USD) in M'] > nfts_one_cat['Volume (USD) in M'].median():
        return 'Very low OS'
    else:
        return 'Not interesting'
    
def hotness_popular(row):
    if row['Twitter followers'] > np.quantile(nfts_one_cat['Twitter followers'],0.75):
        return 'Community'
    elif row['Twitter followers'] > np.quantile(nfts_one_cat['Twitter followers'],0.50):
        return 'Potential community'
    elif row['Twitter followers'] > np.quantile(nfts_one_cat['Twitter followers'],0.25):
        return 'Emerging'
    else:
        return 'Not interesting'
    
def hotness_accesibility(row):
    if row['Floor (USD)'] > np.quantile(nfts_one_cat['Floor (USD)'],0.75):
        return 'Top Collectors'
    elif row['Floor (USD)'] > np.quantile(nfts_one_cat['Floor (USD)'],0.50):
        return 'Middle Accesible'
    elif row['Floor (USD)'] > np.quantile(nfts_one_cat['Floor (USD)'],0.25):
        return 'Highly accesible'
    else:
        return 'Not interesting'
    
def hotness_gas(row):
    if row['Gas Volume (USD) in M'] > np.quantile(nfts_one_cat['Gas Volume (USD) in M'],0.75):
        return 'High gas'
    elif row['Gas Volume (USD) in M'] > np.quantile(nfts_one_cat['Gas Volume (USD) in M'],0.50):
        return 'Medium gas'
    elif row['Gas Volume (USD) in M'] > np.quantile(nfts_one_cat['Gas Volume (USD) in M'],0.25):
        return 'Low gas'
    else:
        return 'Not interesting'
    
def hotness_market_share(row):
    if row['Market Share %'] > np.quantile(nfts_one_cat['Market Share %'],0.75):
        return 'Whale'
    elif row['Market Share %'] > np.quantile(nfts_one_cat['Market Share %'],0.50):
        return 'Shark'
    elif row['Market Share %'] > np.quantile(nfts_one_cat['Market Share %'],0.25):
        return 'Fish'
    else:
        return 'Not interesting'

In [22]:
nfts_one_cat['Growth'] = nfts_one_cat.apply(hotness_growth,axis=1)
nfts_one_cat['Ownership Status'] = nfts_one_cat.apply(hotness_ownership,axis=1)
nfts_one_cat['Popularity'] = nfts_one_cat.apply(hotness_popular,axis=1)
nfts_one_cat['Accesibility'] = nfts_one_cat.apply(hotness_accesibility,axis=1)
nfts_one_cat['Gas Status'] = nfts_one_cat.apply(hotness_gas,axis=1)
nfts_one_cat['Market Status'] = nfts_one_cat.apply(hotness_market_share,axis=1)

In [23]:
#display(nfts_one_cat['Growth'].value_counts())
#display(nfts_one_cat['Ownership Status'].value_counts())
#display(nfts_one_cat['Popularity'].value_counts())
#display(nfts_one_cat['Accesibility'].value_counts())
#display(nfts_one_cat['Gas Status'].value_counts())
#display(nfts_one_cat['Market Status'].value_counts())

## Preparation for Logistic R

In [24]:
nfts_one_cat_simpl = nfts_one_cat.drop(['Volume','Floor','Crypto','Marketplace','Is +1 category?'], 1)
nfts_one_cat_simpl.head(3)

Unnamed: 0,Collection,24 hs %,7 d %,Owners,Items,Category,Chain,Twitter followers,Ownership %,Volume (USD) in M,Floor (USD),Gas Volume (USD) in M,Gas %,Market Share %,Growth,Ownership Status,Popularity,Accesibility,Gas Status,Market Status
0,goblintown.wtf,56.77,562.01,4500,10000,New,Ethereum,39500,45.0,23.216213,5318.136,0.225,0.009692,63.14,Hot - buy now,Low OS,Potential community,Top Collectors,High gas,Whale
1,Chimpers,-35.62,-73.58,2800,5600,New,Ethereum,106700,50.0,15.160282,3631.0032,0.14,0.009235,41.23,Not interesting,Low OS,Community,Top Collectors,High gas,Whale
2,GENE_SIS: The Girls of Armament,-42.52,-29.29,4500,10000,New,Ethereum,236800,45.0,10.197379,385.1064,0.225,0.022064,27.73,Not interesting,Low OS,Community,Middle Accesible,High gas,Whale


In [25]:
# Random testing function with the columns selected on the parameters - Category & Chain -> Get collections with characteristics & generic insights of all chains

In [26]:
# Popularity Regression

X_popularity = nfts_one_cat_simpl[['Twitter followers','Ownership %','Floor (USD)','Volume (USD) in M','Market Share %']]
Y_popularity = pd.DataFrame(data=nfts_one_cat_simpl, columns=['Popularity'])
transformer = StandardScaler().fit(X_popularity)
scaled_x_popularity = pd.DataFrame(transformer.transform(X_popularity),columns = X_popularity.columns)
model = linear_model.LogisticRegression(random_state=0)
result = model.fit(scaled_x_popularity, nfts_one_cat_simpl['Popularity'])

new_collections = pd.DataFrame([{'Twitter followers':82000,'Ownership %':40,'Floor (USD)':1200,'Volume (USD) in M':0.0920,'Market Share %':62},
 {'Twitter followers':2000,'Ownership %':28,'Floor (USD)':324,'Volume (USD) in M':9.20,'Market Share %':32},
 {'Twitter followers':2000000,'Ownership %':62,'Floor (USD)':10987,'Volume (USD) in M':11.20,'Market Share %':12},
{'Twitter followers':20,'Ownership %':16,'Floor (USD)':18,'Volume (USD) in M':0.20,'Market Share %':82}])
 
new_collections_scaled = pd.DataFrame(transformer.transform(new_collections),columns = new_collections.columns)
display(result.predict(new_collections))
display(accuracy_score(result.predict(scaled_x_popularity),Y_popularity))

array(['Community', 'Community', 'Community', 'Community'], dtype=object)

0.4308510638297872

In [27]:
# Growth Regression

X_growth = nfts_one_cat_simpl[['Twitter followers','Ownership %','Floor (USD)','Volume (USD) in M','Market Share %']]
Y_growth = pd.DataFrame(data=nfts_one_cat_simpl, columns=['Growth'])
transformer = StandardScaler().fit(X_growth)
scaled_x_growth = pd.DataFrame(transformer.transform(X_growth),columns = X_growth.columns)
model = linear_model.LogisticRegression(random_state=0)
result = model.fit(scaled_x_growth, nfts_one_cat_simpl['Growth'])

new_collections = pd.DataFrame([{'Twitter followers':82000,'Ownership %':110,'Floor (USD)':345,'Volume (USD) in M':40,'Market Share %':62},
 {'Twitter followers':52000,'Ownership %':20,'Floor (USD)':18,'Volume (USD) in M':0.3,'Market Share %':32},
 {'Twitter followers':2000,'Ownership %':40,'Floor (USD)':192,'Volume (USD) in M':2,'Market Share %':22},
{'Twitter followers':12000,'Ownership %':140,'Floor (USD)':18275,'Volume (USD) in M':82,'Market Share %':12}])
 
new_collections_scaled = pd.DataFrame(transformer.transform(new_collections),columns = new_collections.columns)
display(result.predict(new_collections))
display(accuracy_score(result.predict(scaled_x_growth),Y_growth))

array(['Emerging', 'Emerging', 'Emerging', 'Not interesting'],
      dtype=object)

0.898936170212766

In [28]:
# Ownership Status Regression

X_ownership = nfts_one_cat_simpl[['Twitter followers','Ownership %','Floor (USD)','Volume (USD) in M','Market Share %']]
Y_ownership = pd.DataFrame(data=nfts_one_cat_simpl, columns=['Ownership Status'])
transformer = StandardScaler().fit(X_ownership)
scaled_x_ownership = pd.DataFrame(transformer.transform(X_ownership),columns = X_ownership.columns)
model = linear_model.LogisticRegression(random_state=0)
result = model.fit(scaled_x_ownership, nfts_one_cat_simpl['Ownership Status'])

new_collections = pd.DataFrame([{'Twitter followers':82000,'Ownership %':50,'Floor (USD)':3457,'Volume (USD) in M':8,'Market Share %':82},
 {'Twitter followers':22000,'Ownership %':70,'Floor (USD)':355,'Volume (USD) in M':12,'Market Share %':62},
 {'Twitter followers':12000,'Ownership %':80,'Floor (USD)':34,'Volume (USD) in M':24,'Market Share %':42},
{'Twitter followers':2000,'Ownership %':110,'Floor (USD)':12890,'Volume (USD) in M':32,'Market Share %':22}])
 
new_collections_scaled = pd.DataFrame(transformer.transform(new_collections),columns = new_collections.columns)
display(result.predict(new_collections))
display(accuracy_score(result.predict(scaled_x_ownership),Y_ownership))

array(['Medium OS', 'Not interesting', 'Not interesting', 'Medium OS'],
      dtype=object)

0.601063829787234

In [29]:
# Accesibility Regression

X_access = nfts_one_cat_simpl[['Twitter followers','Ownership %','Floor (USD)','Volume (USD) in M','Market Share %']]
Y_access = pd.DataFrame(data=nfts_one_cat_simpl, columns=['Accesibility'])
transformer = StandardScaler().fit(X_access)
scaled_x_access = pd.DataFrame(transformer.transform(X_access),columns = X_access.columns)
model = linear_model.LogisticRegression(random_state=0)
result = model.fit(scaled_x_access, nfts_one_cat_simpl['Accesibility'])

new_collections = pd.DataFrame([{'Twitter followers':82000,'Ownership %':80,'Floor (USD)':347,'Volume (USD) in M':8,'Market Share %':82},
 {'Twitter followers':32000,'Ownership %':60,'Floor (USD)':34,'Volume (USD) in M':12,'Market Share %':12},
 {'Twitter followers':2200,'Ownership %':50,'Floor (USD)':3457,'Volume (USD) in M':24,'Market Share %':52},
{'Twitter followers':12000,'Ownership %':10,'Floor (USD)':34457,'Volume (USD) in M':32,'Market Share %':32}])
 
new_collections_scaled = pd.DataFrame(transformer.transform(new_collections),columns = new_collections.columns)
display(result.predict(new_collections))
display(accuracy_score(result.predict(scaled_x_access),Y_access))

array(['Middle Accesible', 'Middle Accesible', 'Top Collectors',
       'Top Collectors'], dtype=object)

0.44680851063829785

In [30]:
# Market status Regression

X_market = nfts_one_cat_simpl[['Twitter followers','Ownership %','Floor (USD)','Volume (USD) in M','Market Share %']]
Y_market = pd.DataFrame(data=nfts_one_cat_simpl, columns=['Market Status'])
transformer = StandardScaler().fit(X_market)
scaled_x_market = pd.DataFrame(transformer.transform(X_market),columns = X_market.columns)
model = linear_model.LogisticRegression(random_state=0)
result = model.fit(scaled_x_market, nfts_one_cat_simpl['Market Status'])

new_collections = pd.DataFrame([{'Twitter followers':82000,'Ownership %':80,'Floor (USD)':347,'Volume (USD) in M':8,'Market Share %':82},
 {'Twitter followers':32000,'Ownership %':60,'Floor (USD)':34,'Volume (USD) in M':12,'Market Share %':12},
 {'Twitter followers':2200,'Ownership %':50,'Floor (USD)':3457,'Volume (USD) in M':24,'Market Share %':52},
{'Twitter followers':12000,'Ownership %':10,'Floor (USD)':34457,'Volume (USD) in M':32,'Market Share %':32}])
 
new_collections_scaled = pd.DataFrame(transformer.transform(new_collections),columns = new_collections.columns)
display(result.predict(new_collections))
display(accuracy_score(result.predict(scaled_x_market),Y_market))

array(['Fish', 'Fish', 'Whale', 'Whale'], dtype=object)

0.5425531914893617

## Preparation for Linear R

In [31]:
#Nothing very interesting when it comes to correlation
nfts_one_cat_simpl.corr()

Unnamed: 0,24 hs %,7 d %,Owners,Items,Twitter followers,Ownership %,Volume (USD) in M,Floor (USD),Gas Volume (USD) in M,Gas %,Market Share %
24 hs %,1.0,-0.019847,-0.004551,-0.003224,-0.003772,0.013956,-0.029169,-0.022579,-0.007889,0.038103,-0.02917
7 d %,-0.019847,1.0,-0.007362,-0.027462,-0.015122,0.013129,-0.021123,-0.02081,-0.005159,-0.017562,-0.021124
Owners,-0.004551,-0.007362,1.0,0.916876,0.001991,0.007521,0.152411,-0.021174,0.99931,-5.3e-05,0.152413
Items,-0.003224,-0.027462,0.916876,1.0,-0.000948,-0.024669,0.114945,-0.027371,0.917134,0.037512,0.114947
Twitter followers,-0.003772,-0.015122,0.001991,-0.000948,1.0,-0.007354,0.021892,-0.005614,0.003607,-0.000241,0.021893
Ownership %,0.013956,0.013129,0.007521,-0.024669,-0.007354,1.0,-0.02153,-0.016804,0.009493,0.039144,-0.02153
Volume (USD) in M,-0.029169,-0.021123,0.152411,0.114945,0.021892,-0.02153,1.0,0.150536,0.155051,-0.044239,1.0
Floor (USD),-0.022579,-0.02081,-0.021174,-0.027371,-0.005614,-0.016804,0.150536,1.0,-0.020404,-0.026822,0.150538
Gas Volume (USD) in M,-0.007889,-0.005159,0.99931,0.917134,0.003607,0.009493,0.155051,-0.020404,1.0,0.003064,0.155054
Gas %,0.038103,-0.017562,-5.3e-05,0.037512,-0.000241,0.039144,-0.044239,-0.026822,0.003064,1.0,-0.044238


## Top intervals

In [32]:
print('Twitter followers interval right:',round((st.norm.interval(0.99,loc=np.mean(nfts_one_cat_simpl['Twitter followers']),scale=np.std(nfts_one_cat_simpl['Twitter followers']))[1]),2))
print('Volume (USD) in M interval right:',round((st.norm.interval(0.99,loc=np.mean(nfts_one_cat_simpl['Volume (USD) in M']),scale=np.std(nfts_one_cat_simpl['Volume (USD) in M']))[1]),2))
print('Floor (USD) interval right:',round((st.norm.interval(0.99,loc=np.mean(nfts_one_cat_simpl['Floor (USD)']),scale=np.std(nfts_one_cat_simpl['Floor (USD)']))[1]),2))
print('Gas Volume (USD) in M interval right:',round((st.norm.interval(0.99,loc=np.mean(nfts_one_cat_simpl['Gas Volume (USD) in M']),scale=np.std(nfts_one_cat_simpl['Gas Volume (USD) in M']))[1]),2))
print('Owners interval right:',round((st.norm.interval(0.99,loc=np.mean(nfts_one_cat_simpl['Owners']),scale=np.std(nfts_one_cat_simpl['Owners']))[1]),2))

Twitter followers interval right: 3102689.43
Volume (USD) in M interval right: 217.46
Floor (USD) interval right: 45082.82
Gas Volume (USD) in M interval right: 4.65
Owners interval right: 93414.4


## How different are each Chains from each other?

In [33]:
def t_test_features(s1, s2, features=['24 hs %', '7 d %','Owners','Items','Twitter followers','Ownership %','Volume (USD) in M','Floor (USD)','Gas Volume (USD) in M','Gas %','Market Share %']):
    results = {k:st.ttest_ind(s1[k], s2[k])[1] for k in features}   
    return results

In [34]:
ethereum = nfts_one_cat_simpl[nfts_one_cat_simpl['Chain'] == 'Ethereum']
solana = nfts_one_cat_simpl[nfts_one_cat_simpl['Chain'] == 'Solana']
polygon = nfts_one_cat_simpl[nfts_one_cat_simpl['Chain'] == 'Polygon']
klaytn = nfts_one_cat_simpl[nfts_one_cat_simpl['Chain'] == 'Klaytn']

In [35]:
#Chains 

#list(set(nfts_one_cat_simpl['Chain']))
#display("ETH VS POLYGON:",t_test_features(ethereum,polygon))
display("ETH VS SOLANA:",t_test_features(ethereum,solana)) #Ownership
#display("ETH VS KLAYTN:",t_test_features(ethereum,klaytn)) #Gas%
#display("POLYGON VS SOLANA:",t_test_features(polygon,solana)) #Owners - Market Share % - Twitter followers
#display("POLYGON VS KLAYTN:",t_test_features(polygon,klaytn)) # Floor (USD) - Twitter followers
#display("KLAYTN VS SOLANA:",t_test_features(klaytn,solana)) # Gas Volume (USD) in M - Ownership %

'ETH VS SOLANA:'

{'24 hs %': 0.20742723853927458,
 '7 d %': 0.24045584258133348,
 'Owners': 0.03974856435553517,
 'Items': 0.03752772488083805,
 'Twitter followers': 0.0003994083097955009,
 'Ownership %': 0.26629321402757833,
 'Volume (USD) in M': 0.0009882671923604925,
 'Floor (USD)': 0.11088445507198948,
 'Gas Volume (USD) in M': 0.026437065329574315,
 'Gas %': nan,
 'Market Share %': 0.0009882044295969714}

## How different are each Categories from each other?

In [36]:
art = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Art']
collect = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Collectibles']
photo = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Photography']
cards = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Trading Cards']
worlds = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Virtual Worlds']
music = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Music']
utility = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Utility']
new = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'New']
domains = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Domain Names']
sports = nfts_one_cat_simpl[nfts_one_cat_simpl['Category'] == 'Sports']

In [37]:
#Categories 

import itertools
        
categories_specs = [art,collect,photo,cards,worlds,music,utility,new,domains,sports]
#categories = list(set(nfts_one_cat_simpl['Category']))
#for x,y in itertools.combinations(categories_specs, 2):
#    display(t_test_features(x,y))

In [38]:
display('Art vs Collectibles',t_test_features(art,collect))
#display('Art vs Photo',t_test_features(art,photo))
#display('Art vs Trading Cards',t_test_features(art,cards))
#display('Art vs Virtual Worlds',t_test_features(art,worlds))
#display('Art vs Music',t_test_features(art,music))
#display('Art vs Utility',t_test_features(art,utility))
#display('Art vs New',t_test_features(art,new))
#display('Art vs Domain Names',t_test_features(art,domains))
#display('Art vs Sports',t_test_features(art,sports))

'Art vs Collectibles'

{'24 hs %': 0.8817901849837633,
 '7 d %': 0.30718672345845666,
 'Owners': 0.44582601119568754,
 'Items': 0.48722904386557175,
 'Twitter followers': 0.37251157636865406,
 'Ownership %': 0.658911025991015,
 'Volume (USD) in M': 0.1886529128209009,
 'Floor (USD)': 0.029454360827220416,
 'Gas Volume (USD) in M': 0.39537184845306395,
 'Gas %': 0.6087868588613543,
 'Market Share %': 0.1886539366147292}

In [39]:
display('Collectibles vs Art',t_test_features(art,collect))
#display('Collectibles vs Photo',t_test_features(collect,photo))
#display('Collectibles vs Trading Cards',t_test_features(collect,cards))
#display('Collectibles vs Virtual Worlds',t_test_features(collect,worlds))
#display('Collectibles vs Music',t_test_features(collect,music))
#display('Collectibles vs Utility',t_test_features(collect,utility))
#display('Collectibles vs New',t_test_features(collect,new))
#display('Collectibles vs Domain Names',t_test_features(collect,domains))
#display('Collectibles vs Sports',t_test_features(collect,sports))

'Collectibles vs Art'

{'24 hs %': 0.8817901849837633,
 '7 d %': 0.30718672345845666,
 'Owners': 0.44582601119568754,
 'Items': 0.48722904386557175,
 'Twitter followers': 0.37251157636865406,
 'Ownership %': 0.658911025991015,
 'Volume (USD) in M': 0.1886529128209009,
 'Floor (USD)': 0.029454360827220416,
 'Gas Volume (USD) in M': 0.39537184845306395,
 'Gas %': 0.6087868588613543,
 'Market Share %': 0.1886539366147292}

In [40]:
display('Photo vs Art',t_test_features(art,photo))
#display('Photo vs Collectibles',t_test_features(collect,photo))
#display('Photo vs Trading Cards',t_test_features(photo,cards))
#display('Photo vs Virtual Worlds',t_test_features(photo,worlds))
#display('Photo vs Music',t_test_features(photo,music))
#display('Photo vs Utility',t_test_features(photo,utility))
#display('Photo vs New',t_test_features(photo,new))
#display('Photo vs Domain Names',t_test_features(photo,domains))
#display('Photo vs Sports',t_test_features(photo,sports))

'Photo vs Art'

{'24 hs %': 0.4541162649253636,
 '7 d %': 0.42339423628742123,
 'Owners': 0.24473827051807284,
 'Items': 0.15953556985304793,
 'Twitter followers': 0.41358521299466766,
 'Ownership %': 0.3787374302038138,
 'Volume (USD) in M': 0.029398880755468787,
 'Floor (USD)': 0.2580104467214223,
 'Gas Volume (USD) in M': 0.2776967042042439,
 'Gas %': nan,
 'Market Share %': 0.02940136230709611}

In [41]:
display('Trading Cards vs Art',t_test_features(art,cards))
#display('Trading Cards vs Collectibles',t_test_features(collect,cards))
#display('Trading Cards vs Photography',t_test_features(photo,cards))
#display('Trading Cards vs Trading Cards',t_test_features(cards,worlds))
#display('Trading Cards vs Music',t_test_features(cards,music))
#display('Trading Cards vs Utility',t_test_features(cards,utility))
#display('Trading Cards vs New',t_test_features(cards,new))
#display('Trading Cards vs Domain Names',t_test_features(cards,domains))
#display('Trading Cards vs Sports',t_test_features(cards,sports))

'Trading Cards vs Art'

{'24 hs %': 0.34207011494434836,
 '7 d %': 0.23776893051752213,
 'Owners': 0.4536829802913448,
 'Items': 0.7824636868610071,
 'Twitter followers': 0.06055284076220596,
 'Ownership %': 0.6154194604907329,
 'Volume (USD) in M': 0.2796795231554114,
 'Floor (USD)': 0.13721483102952076,
 'Gas Volume (USD) in M': 0.47762721995141066,
 'Gas %': 0.41982396339078987,
 'Market Share %': 0.279685595325448}

In [42]:
display('Virtual Worlds vs Art',t_test_features(art,worlds))
#display('Virtual Worlds vs Collectibles',t_test_features(collect,worlds))
#display('Virtual Worlds vs Photography',t_test_features(photo,worlds))
#display('Virtual Worlds vs Trading Cards',t_test_features(cards,worlds))
#display('Virtual Worlds vs Music',t_test_features(worlds,music))
#display('Virtual Worlds vs Utility',t_test_features(worlds,utility))
#display('Virtual Worlds vs New',t_test_features(worlds,new))
#display('Virtual Worlds vs Domain Names',t_test_features(worlds,domains))
#display('Virtual Worlds vs Sports',t_test_features(worlds,sports))

'Virtual Worlds vs Art'

{'24 hs %': 0.3457293033436464,
 '7 d %': 0.41911678890831294,
 'Owners': 0.42641744746556,
 'Items': 0.7691073069736132,
 'Twitter followers': 0.8703065969539139,
 'Ownership %': 0.5682142891271207,
 'Volume (USD) in M': 0.5851713942368864,
 'Floor (USD)': 0.4197260771989091,
 'Gas Volume (USD) in M': 0.4449799450130263,
 'Gas %': nan,
 'Market Share %': 0.5851635466403311}

In [43]:
display('Music vs Art',t_test_features(art,music))
#display('Music vs Collectibles',t_test_features(collect,music))
#display('Music vs Photography',t_test_features(photo,music))
#display('Music vs Trading Cards',t_test_features(cards,music))
#display('Music vs Virtual Worlds',t_test_features(worlds,music))
#display('Music vs Utility',t_test_features(music,utility))
#display('Music vs New',t_test_features(music,new))
#display('Music vs Domain Names',t_test_features(music,domains))
#display('Music vs Sports',t_test_features(music,sports))

'Music vs Art'

{'24 hs %': 0.1425924182888139,
 '7 d %': 0.06770822990963242,
 'Owners': 0.2508624771338539,
 'Items': 0.2942241920180355,
 'Twitter followers': 0.21839546737016097,
 'Ownership %': 0.4170234144566034,
 'Volume (USD) in M': 0.03162839367726653,
 'Floor (USD)': 0.14234516450673287,
 'Gas Volume (USD) in M': 0.28145221756441235,
 'Gas %': nan,
 'Market Share %': 0.031630476273641746}

In [44]:
display('Utility vs Art',t_test_features(art,utility))
#display('Utility vs Collectibles',t_test_features(collect,utility))
#display('Utility vs Photography',t_test_features(photo,utility))
#display('Utility vs Trading Cards',t_test_features(cards,utility))
#display('Utility vs Virtual Worlds',t_test_features(worlds,utility))
#display('Utility vs Music',t_test_features(music,utility))
#display('Utility vs New',t_test_features(utility,new))
#display('Utility vs Domain Names',t_test_features(utility,domains))
#display('Utility vs Sports',t_test_features(utility,sports))

'Utility vs Art'

{'24 hs %': 0.1687429022212281,
 '7 d %': 0.2678664575042043,
 'Owners': 0.3021977011266213,
 'Items': 0.5027072007009215,
 'Twitter followers': 0.6775087898560874,
 'Ownership %': 0.701238117440746,
 'Volume (USD) in M': 0.24135780255183278,
 'Floor (USD)': 0.6882059700352163,
 'Gas Volume (USD) in M': 0.307514200019678,
 'Gas %': 0.3697744778841294,
 'Market Share %': 0.2413722932576389}

In [45]:
display('New vs Art',t_test_features(art,new))
#display('New vs Collectibles',t_test_features(collect,new))
#display('New vs Photography',t_test_features(photo,new))
#display('New vs Trading Cards',t_test_features(cards,new))
#display('New vs Virtual Worlds',t_test_features(worlds,new))
#display('New vs Music',t_test_features(music,new))
#display('New vs Utility',t_test_features(utility,new))
#display('New vs Domain Names',t_test_features(new,domains))
#display('New vs Sports',t_test_features(new,sports))

'New vs Art'

{'24 hs %': 0.4709013875129686,
 '7 d %': 0.2813765511059947,
 'Owners': 0.3139281419172414,
 'Items': 0.27034812602770486,
 'Twitter followers': 0.21958583581169636,
 'Ownership %': 0.6122316253865021,
 'Volume (USD) in M': 0.05854529755292367,
 'Floor (USD)': 0.6543604146121201,
 'Gas Volume (USD) in M': 0.31275388013354466,
 'Gas %': 0.07691056420394914,
 'Market Share %': 0.05854643197545437}

In [46]:
display('Domain Names vs Art',t_test_features(art,domains))
#display('Domain Names vs Collectibles',t_test_features(collect,domains))
#display('Domain Names vs Photography',t_test_features(photo,domains))
#display('Domain Names vs Trading Cards',t_test_features(cards,domains))
#display('Domain Names vs Virtual Worlds',t_test_features(worlds,domains))
#display('Domain Names vs Music',t_test_features(music,domains))
#display('Domain Names vs Utility',t_test_features(utility,domains))
#display('Domain Names vs New',t_test_features(new,domains))
#display('Domain Names vs Sports',t_test_features(domains,sports))

'Domain Names vs Art'

{'24 hs %': 0.30138014292628096,
 '7 d %': 0.10947368666492113,
 'Owners': 0.7052413819012541,
 'Items': 0.44614257651594713,
 'Twitter followers': 0.041375590225445,
 'Ownership %': 0.2726106828477233,
 'Volume (USD) in M': 0.04890893265727333,
 'Floor (USD)': 0.39223233786343503,
 'Gas Volume (USD) in M': 0.6774348392716556,
 'Gas %': 0.2819631434751691,
 'Market Share %': 0.04891406794818281}

In [47]:
display('Sports vs Art',t_test_features(art,sports))
#display('Sports vs Collectibles',t_test_features(collect,sports))
#display('Sports vs Photography',t_test_features(photo,sports))
#display('Sports vs Trading Cards',t_test_features(cards,sports))
#display('Sports vs Virtual Worlds',t_test_features(worlds,sports))
#display('Sports vs Music',t_test_features(music,sports))
#display('Sports vs Utility',t_test_features(utility,sports))
#display('Sports vs New',t_test_features(new,sports))
#display('Sports vs Domain Names',t_test_features(domains,sports))

'Sports vs Art'

{'24 hs %': 0.014800320308018106,
 '7 d %': 0.04638663940810472,
 'Owners': 0.5120835098431165,
 'Items': 0.5527762039456134,
 'Twitter followers': 0.041419468719651874,
 'Ownership %': 0.3087109283809533,
 'Volume (USD) in M': 0.046661889833310745,
 'Floor (USD)': 0.07981612221009068,
 'Gas Volume (USD) in M': 0.5501493193986406,
 'Gas %': 0.24699102943202428,
 'Market Share %': 0.046667432643649885}

## Final output with manual recommendations

In [48]:
nfts_recos = pd.read_csv('nft_final_recommendations.csv')
nfts_recos.head()

Unnamed: 0,Collection,24 hs %,7 d %,Owners,Items,Category,Chain,Twitter followers,Ownership %,Volume (USD) in M,Floor (USD),Gas Volume (USD) in M,Gas %,Market Share %,Growth,Ownership Status,Popularity,Accesibility,Gas Status,Market Status,Recommendation
0,goblintown.wtf,56.77,562.01,4500,10000,New,Ethereum,39500,45.0,23.216213,5318.136,0.225,0.009692,63.14,Hot - buy now,Low OS,Potential community,Top Collectors,High gas,Whale,Highly recommended
1,Chimpers,-35.62,-73.58,2800,5600,New,Ethereum,106700,50.0,15.160282,3631.0032,0.14,0.009235,41.23,Not interesting,Low OS,Community,Top Collectors,High gas,Whale,Sligthly recommended
2,GENE_SIS: The Girls of Armament,-42.52,-29.29,4500,10000,New,Ethereum,236800,45.0,10.197379,385.1064,0.225,0.022064,27.73,Not interesting,Low OS,Community,Middle Accesible,High gas,Whale,Give it a chance
3,AINightbirds,-53.6,0.0,4700,10000,New,Ethereum,11200,47.0,9.186401,421.7832,0.235,0.025581,24.98,Not interesting,Low OS,Potential community,Top Collectors,High gas,Whale,Sligthly recommended
4,Moonbirds Oddities,-18.56,0.0,6200,10000,New,Ethereum,201900,62.0,6.842699,5024.7216,0.31,0.045304,18.61,Not interesting,Medium OS,Community,Top Collectors,High gas,Whale,Sligthly recommended


In [49]:
nfts_recos['Recommendation'].value_counts()

Not recommended         61
Sligthly recommended    38
Highly recommended      37
Give it a chance        32
Name: Recommendation, dtype: int64

## Wrap up flows
Overall functions for Flask.

In [50]:
# Selecciona category y chain con input de Flask forms - Descriptive analysis generalista 


def define_filter(chain,category):
    return nfts_recos[(nfts_recos['Chain'] == chain)&(nfts_recos['Category'] == category)]

def chain_category_summary():

  return nfts_recos.groupby(['Chain','Category']).agg({'24 hs %':'mean',
                                 '7 d %':'mean',
                                 'Twitter followers':'mean',
                                 'Ownership %':'mean',
                                 'Volume (USD) in M':'mean',
                                 'Floor (USD)':'mean',
                                 'Market Share %':'mean'})

In [55]:
def full_func():
    
    categories = ['Art','Collectibles','Sports','Music','New','Photography','Virtual Worlds','Domain Names','Trading Cards','Utility']
    chains = ['Ethereum','Solana','Polygon','Klaytn']
    category = str(input("""What category do you want to analyze? Write Art - Collectibles - Sports - Music - New - Photography - Virtual Worlds - Domain Names - Trading Cards - Utility  """))  
    while category not in categories:
        print("Please, choose one of the options below.")
        category = str(input("""What category do you want to analyze? Write Art - Collectibles - Sports - Music - New - Photography - Virtual Worlds - Domain Names - Trading Cards - Utility  """))  
    
    chain = str(input("""What chain do you want to analyze? Write Ethereum - Solana - Polygon - Klaytn""")) 
    while chain not in chains:
        print("Please, choose one of the options below.")
        chain = str(input("""What chain do you want to analyze? Write Ethereum - Solana - Polygon - Klaytn""")) 

    display(define_filter(chain,category))

In [56]:
# Compare 2 chains to the others

def t_test_features(s1, s2, features=['24 hs %', '7 d %','Owners','Items','Twitter followers','Ownership %','Volume (USD) in M','Floor (USD)','Gas Volume (USD) in M','Gas %','Market Share %']):
    results = {k:st.ttest_ind(s1[k], s2[k])[1] for k in features}   
    return results

def compare_chains():
    #chains = ['Ethereum','Solana','Polygon','Klaytn']
    
    ch1 = str(input("""Which 1st chain do you want to compare? Write Ethereum, Polygon, Solana or Klaytn"""))
    ch1_filter = nfts_recos[nfts_recos['Chain'] == ch1]
    #while ch1 not in chains:
    #    print("Please write one of the options below.")
    #    ch1 = str(input("""Which 1st chain do you want to compare? Write Ethereum, Polygon, Polygon or Klaytn"""))
    
    ch2 = str(input("""Which 2nd chain do you want to compare? Write Ethereum, Polygon, Solana or Klaytn"""))
    ch2_filter = nfts_recos[nfts_recos['Chain'] == ch2]
    #while ch2 not in chains:
    #    print("Please write one of the options below.")
    #    ch2 = str(input("""Which 2nd chain do you want to compare? Write Ethereum, Polygon, Polygon or Klaytn"""))
    
    #if ch1 == ch2:
    #        print("You can't compare the same chain")
    #        ch1 = str(input("""Which 1st chain do you want to compare? Write Ethereum, Polygon, Polygon or Klaytn"""))
    #        while ch1 not in chains:
    #            print("Please write one of the options below.")
    #            ch1 = str(input("""Which 1st chain do you want to compare? Write Ethereum, Polygon, Polygon or Klaytn"""))
    #        ch2 = str(input("""Which 2nd chain do you want to compare? Write Ethereum, Polygon, Polygon or Klaytn"""))
    #        while ch2 not in chains:
    #            print("Please write one of the options below.")
    #           ch2 = str(input("""Which 2nd chain do you want to compare? Write Ethereum, Polygon, Polygon or Klaytn"""))
    #else: 
    return f'{ch1} vs {ch2}:',t_test_features(ch1_filter,ch2_filter)

In [57]:
# Compare 2 categories to the others

def t_test_features(s1, s2, features=['24 hs %', '7 d %','Owners','Items','Twitter followers','Ownership %','Volume (USD) in M','Floor (USD)','Gas Volume (USD) in M','Gas %','Market Share %']):
    results = {k:st.ttest_ind(s1[k], s2[k])[1] for k in features}   
    return results

def compare_categories():
    
    cat1 = str(input("""What 1st category do you want to analyze? Write Art - Collectibles - Sports - Music - New - Photography - Virtual Worlds - Domain Names - Trading Cards - Utility  """))
    cat1_filter = nfts_recos[nfts_recos['Category'] == cat1]

    cat2 = str(input("""What 2nd category do you want to analyze? Write Art - Collectibles - Sports - Music - New - Photography - Virtual Worlds - Domain Names - Trading Cards - Utility  """))
    cat2_filter = nfts_recos[nfts_recos['Category'] == cat2]

    return f'{cat1} vs {cat2}:',t_test_features(cat1_filter,cat2_filter)

In [58]:
# Logistic Regression function
import random

# Feature to predict 
def feature_lr():
    feature = input("What feature do you want to predict? Write Popularity, Accesibility, Growth, Ownership Status, Market Status or Recommendation")

    X = nfts_recos[['7 d %','Twitter followers','Ownership %','Floor (USD)','Volume (USD) in M','Market Share %']]
    Y = pd.DataFrame(data=nfts_recos, columns=[feature])
    transformer = StandardScaler().fit(X)
    scaled_x = pd.DataFrame(transformer.transform(X),columns = X.columns)
    model = linear_model.LogisticRegression(random_state=0)
    result = model.fit(scaled_x, nfts_recos[feature])
    
    # Dictionary comprehension 

    new_collections = pd.DataFrame([{'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                     {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                     {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                    {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                   {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                   {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                   {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                   {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                   {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))},
                                   {'7 d %':random.randint(int(nfts_recos['7 d %'].min()), int(nfts_recos['7 d %'].max())),
                                     'Twitter followers':random.randint(int(nfts_recos['Twitter followers'].min()), int(nfts_recos['Twitter followers'].max())),
                                     'Ownership %':random.randint(int(nfts_recos['Ownership %'].min()), int(nfts_recos['Ownership %'].max())),
                                     'Floor (USD)':random.randint(int(nfts_recos['Floor (USD)'].min()), int(nfts_recos['Floor (USD)'].max())),
                                     'Volume (USD) in M':random.randint(int(nfts_recos['Volume (USD) in M'].min()), int(nfts_recos['Volume (USD) in M'].max())),
                                     'Market Share %':random.randint(int(nfts_recos['Market Share %'].min()), int(nfts_recos['Market Share %'].max()))}])

    new_collections_scaled = pd.DataFrame(transformer.transform(new_collections),columns = new_collections.columns)
    print("Running 50 random tests...")
    print("Accuracy score is: " , (round(accuracy_score(result.predict(scaled_x),Y),2)*100),"%")
    if (round(accuracy_score(result.predict(scaled_x),Y),2)*100) > 75:
        print("Likely to be right")
    elif (round(accuracy_score(result.predict(scaled_x),Y),2)*100) > 65:
        print("More likely to be right, but not very trustable yet")
    elif (round(accuracy_score(result.predict(scaled_x),Y),2)*100) > 50:
        print("Not enough values to make a good prediction, need more sampling")
    else:
        print("Not enough values to make a good prediction. Try another feature")
    print("Results are...")
    return list(result.predict(new_collections))