In [1]:
import warnings
warnings.filterwarnings('ignore')

In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter

In [3]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

In [4]:
from config import database, username, password
import sqlalchemy

In [5]:
#Connecting to database and importing table
engine= sqlalchemy.create_engine(f'postgresql://{username}:{password}@localhost/{database}')
con= engine.connect()

nft_table= pd.read_sql("SELECT * FROM final_nft", con=engine)
nft_table

Unnamed: 0,title,name_of_work,creator,art_series,price,type_of_nft,likes,nsfw,total_units,year_create,rights
0,30 min Drawings,Giant Frog,kristyglas,kristyglas_30-min-drawings_giant-frog,50.0,PHOTO,2,False,30,2020,1
1,Experimental Video,Biospecimens,juliakponsford,juliakponsford_experimental-video_biospecimens,500.0,VIDEO,0,False,1,2020,1
2,Sexy Art,long legs,badsexy,badsexy_sexy-art_long-legs,10.0,PHOTO,0,True,2,2021,1
3,Dream World,A Guide in my Dreams,yoslehz,yoslehz_dream-world_a-guide-in-my-dreams,20.0,PHOTO,1,False,2,2020,1
4,Dream World,Silent Observer,yoslehz,yoslehz_dream-world_silent-observer,20.0,GIF,0,False,2,2020,1
...,...,...,...,...,...,...,...,...,...,...,...
4170,Abstract Erotica,Taste of Heaven,tntdabomb,tntdabomb_abstract-erotica_taste-of-heaven,50.0,PHOTO,0,False,10,2020,3
4171,GIF art,crypto thinkers,elgeko,elgeko_gif-art_crypto-thinkers,99.0,GIF,0,False,5,2020,1
4172,Eye,Eye Of The Beholder,rubenalexander,rubenalexander_eye_eye-of-the-beholder,50.0,PHOTO,0,False,3,2020,1
4173,HIVE ART,online art - Cryptoverse,elgeko,elgeko_hive-art_online-art-cryptoverse,99.0,GIF,0,False,7,2020,1


In [7]:
#Drop unnecessary columns
NFT_df_clean = nft_table.drop(columns=["art_series","rights"])
NFT_df_clean.head()

Unnamed: 0,title,name_of_work,creator,price,type_of_nft,likes,nsfw,total_units,year_create
0,30 min Drawings,Giant Frog,kristyglas,50.0,PHOTO,2,False,30,2020
1,Experimental Video,Biospecimens,juliakponsford,500.0,VIDEO,0,False,1,2020
2,Sexy Art,long legs,badsexy,10.0,PHOTO,0,True,2,2021
3,Dream World,A Guide in my Dreams,yoslehz,20.0,PHOTO,1,False,2,2020
4,Dream World,Silent Observer,yoslehz,20.0,GIF,0,False,2,2020


In [8]:
# Remove rows that have at least 1 null value.
NFT_df_clean.dropna(inplace = True)
NFT_df_clean.shape

(4175, 9)

In [9]:
NFT_df_clean.dtypes

title            object
name_of_work     object
creator          object
price           float64
type_of_nft      object
likes             int64
nsfw               bool
total_units       int64
year_create       int64
dtype: object

In [10]:
NFT_df_clean['price'] = NFT_df_clean['price'].astype(int)

In [11]:
NFT_df_clean.dtypes

title           object
name_of_work    object
creator         object
price            int32
type_of_nft     object
likes            int64
nsfw              bool
total_units      int64
year_create      int64
dtype: object

In [12]:
NFT_df_clean = NFT_df_clean[NFT_df_clean['price'] <= 1000000]
NFT_df_clean.shape

(4174, 9)

In [13]:
# Create a new DataFrame that holds only the likes.
likes_df = pd.DataFrame(NFT_df_clean["likes"])
likes_df.head()

Unnamed: 0,likes
0,2
1,0
2,0
3,1
4,0


In [12]:
# Drop the 'likes' column since it's not going to be used on the clustering algorithm.
NFT_df_clean = NFT_df_clean.drop(columns = "likes")
NFT_df_clean.head()

Unnamed: 0,title,name,creator,price,type,nsfw,tokens,year
0,30 min Drawings,Giant Frog,kristyglas,50,PHOTO,False,30,2020
1,Experimental Video,Biospecimens,juliakponsford,500,VIDEO,False,1,2020
2,Sexy Art,long legs,badsexy,10,PHOTO,True,2,2021
3,Dream World,A Guide in my Dreams,yoslehz,20,PHOTO,False,2,2020
4,Dream World,Silent Observer,yoslehz,20,GIF,False,2,2020


In [14]:
# Use get_dummies() to create variables for text features.
NFT_df_encoded = pd.get_dummies(NFT_df_clean, columns = ["title","name","creator","type","nsfw"])
NFT_df_encoded.columns

Index(['price', 'tokens', 'year', 'title_  short horror stories in a poetry',
       'title_ Celebrities Art', 'title_ Changes in the frequency of the aura',
       'title_ DeviantArt Fennec Fox Sketch', 'title_ Devices',
       'title_ Distortions',
       'title_ Heart transfer. Moon Guardian and Demon Hanwallu.',
       ...
       'creator_zord189', 'creator_zullyscott', 'creator_zuly63',
       'creator_zuppaman', 'creator_zvx', 'type_GIF', 'type_PHOTO',
       'type_VIDEO', 'nsfw_False', 'nsfw_True'],
      dtype='object', length=6316)