In [1]:
# Initial imports
import pandas as pd
import hvplot.pandas
from path import Path
import plotly.express as px
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from config import database, username, password
import sqlalchemy

In [2]:
engine= sqlalchemy.create_engine(f'postgresql://{username}:{password}@localhost/{database}')
con= engine.connect()

nft_table= pd.read_sql("SELECT * FROM final_nft", con=engine)
nft_table

Unnamed: 0,title,name_of_work,creator,art_series,price,type_of_nft,likes,nsfw,total_units,year_create,rights
0,30 min Drawings,Giant Frog,kristyglas,kristyglas_30-min-drawings_giant-frog,50.0,PHOTO,2,False,30,2020,1
1,Experimental Video,Biospecimens,juliakponsford,juliakponsford_experimental-video_biospecimens,500.0,VIDEO,0,False,1,2020,1
2,Sexy Art,long legs,badsexy,badsexy_sexy-art_long-legs,10.0,PHOTO,0,True,2,2021,1
3,Dream World,A Guide in my Dreams,yoslehz,yoslehz_dream-world_a-guide-in-my-dreams,20.0,PHOTO,1,False,2,2020,1
4,Dream World,Silent Observer,yoslehz,yoslehz_dream-world_silent-observer,20.0,GIF,0,False,2,2020,1
...,...,...,...,...,...,...,...,...,...,...,...
4170,Abstract Erotica,Taste of Heaven,tntdabomb,tntdabomb_abstract-erotica_taste-of-heaven,50.0,PHOTO,0,False,10,2020,3
4171,GIF art,crypto thinkers,elgeko,elgeko_gif-art_crypto-thinkers,99.0,GIF,0,False,5,2020,1
4172,Eye,Eye Of The Beholder,rubenalexander,rubenalexander_eye_eye-of-the-beholder,50.0,PHOTO,0,False,3,2020,1
4173,HIVE ART,online art - Cryptoverse,elgeko,elgeko_hive-art_online-art-cryptoverse,99.0,GIF,0,False,7,2020,1


In [3]:
#Drop unnecessary columns
NFT_df_clean = nft_table.drop(columns=["art_series","rights"])
NFT_df_clean.head()

Unnamed: 0,title,name_of_work,creator,price,type_of_nft,likes,nsfw,total_units,year_create
0,30 min Drawings,Giant Frog,kristyglas,50.0,PHOTO,2,False,30,2020
1,Experimental Video,Biospecimens,juliakponsford,500.0,VIDEO,0,False,1,2020
2,Sexy Art,long legs,badsexy,10.0,PHOTO,0,True,2,2021
3,Dream World,A Guide in my Dreams,yoslehz,20.0,PHOTO,1,False,2,2020
4,Dream World,Silent Observer,yoslehz,20.0,GIF,0,False,2,2020


In [4]:
# Remove rows that have at least 1 null value.
NFT_df_clean.dropna(inplace = True)
NFT_df_clean.shape

(4175, 9)

In [5]:
NFT_df_clean['price'] = NFT_df_clean['price'].astype(int)

In [6]:
NFT_df_clean = NFT_df_clean[NFT_df_clean['price'] <= 750000]

In [7]:
# Adding column showing how old each NFT is
NFT_df_clean['NFT_age_in_years'] = 2022 - (NFT_df_clean['year_create'])
NFT_df_clean = NFT_df_clean.drop(columns='year_create')

In [8]:
# Create a new DataFrame that holds only the age of the NFT.
nft_age_df = pd.DataFrame(NFT_df_clean["NFT_age_in_years"])
nft_age_df.head()

Unnamed: 0,NFT_age_in_years
0,2
1,2
2,1
3,2
4,2


In [9]:
# Drop the 'NFT_age_in_years' column since it's not going to be used on the clustering algorithm.
NFT_df_clean = NFT_df_clean.drop(columns = "NFT_age_in_years")
NFT_df_clean.head()

Unnamed: 0,title,name_of_work,creator,price,type_of_nft,likes,nsfw,total_units
0,30 min Drawings,Giant Frog,kristyglas,50,PHOTO,2,False,30
1,Experimental Video,Biospecimens,juliakponsford,500,VIDEO,0,False,1
2,Sexy Art,long legs,badsexy,10,PHOTO,0,True,2
3,Dream World,A Guide in my Dreams,yoslehz,20,PHOTO,1,False,2
4,Dream World,Silent Observer,yoslehz,20,GIF,0,False,2


In [10]:
# Use get_dummies() to create variables for text features.
X_df = pd.get_dummies(NFT_df_clean, columns = ["title","name_of_work","creator","type_of_nft","nsfw"])

In [11]:
X_df.shape

(4173, 6295)

In [12]:
X_df.head(10)

Unnamed: 0,price,likes,total_units,title_ short horror stories in a poetry,title_ Celebrities Art,title_ Changes in the frequency of the aura,title_ DeviantArt Fennec Fox Sketch,title_ Devices,title_ Distortions,title_ Heart transfer. Moon Guardian and Demon Hanwallu.,...,creator_zord189,creator_zullyscott,creator_zuly63,creator_zuppaman,creator_zvx,type_of_nft_GIF,type_of_nft_PHOTO,type_of_nft_VIDEO,nsfw_False,nsfw_True
0,50,2,30,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,0
1,500,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,1,0
2,10,0,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,0,1
3,20,1,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,0
4,20,0,2,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0
5,20,0,2,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0
6,20,0,2,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,0
7,19,2,6,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,1
8,64,1,3,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,1,0
9,35,3,5,0,0,0,0,0,0,0,...,0,0,0,0,0,0,1,0,1,0


In [13]:
# Standardize the data with StandardScaler().
X_scaled = StandardScaler().fit_transform(X_df)

In [14]:
X_scaled

array([[-0.09235921,  1.6229651 ,  2.76719216, ..., -0.34636367,
         0.18493443, -0.18493443],
       [-0.00965237, -0.51384934, -0.33950679, ...,  2.8871388 ,
         0.18493443, -0.18493443],
       [-0.09971093, -0.51384934, -0.23237924, ..., -0.34636367,
        -5.40732193,  5.40732193],
       ...,
       [-0.09235921, -0.51384934, -0.12525169, ..., -0.34636367,
         0.18493443, -0.18493443],
       [-0.08335336, -0.51384934,  0.30325851, ..., -0.34636367,
         0.18493443, -0.18493443],
       [ 0.02710623, -0.51384934, -0.33950679, ..., -0.34636367,
         0.18493443, -0.18493443]])

In [15]:
# Initialize PCA model
pca = PCA(n_components=2)

In [16]:
# Get two principal components for the data.
X_pca = pca.fit_transform(X_scaled)

In [17]:
pca.explained_variance_ratio_

array([0.00067956, 0.00063078])

In [18]:
# Create a DataFrame with the three principal components.
NFT_df_pca = pd.DataFrame(
    data = X_pca, columns = ["PC 1", "PC 2"], index = NFT_df_clean.index
)
NFT_df_pca.head(10)

Unnamed: 0,PC 1,PC 2
0,0.07393,-1.314662
1,-1.336734,2.16322
2,9.955393,3.696914
3,0.275528,-0.37297
4,-1.103282,1.390097
5,-1.103315,1.39005
6,0.215993,-0.405621
7,4.062991,7.047635
8,-1.841533,4.068975
9,-0.017623,-0.571181


In [19]:
# Create an elbow curve to find the best value for K.
inertia = []
k = list(range(1, 11))

In [20]:
# Looking for the best K
for i in k:
    km = KMeans(n_clusters=i, random_state=0)
    km.fit(NFT_df_pca)
    inertia.append(km.inertia_)

In [21]:
# Define a DataFrame to plot the Elbow Curve using hvPlot
elbow_data = {"k": k, "inertia": inertia}
df_elbow = pd.DataFrame(elbow_data)
df_elbow.hvplot.line(x="k", y="inertia", title="Elbow Curve", xticks=k)

In [22]:
# Initializing model with K = 3 (based off elbow curve graph)
model = KMeans(n_clusters=3, random_state=5)
model

# Fit the model
model.fit(NFT_df_pca)

# Predict clusters
predictions = model.predict(NFT_df_pca)

# Add the predicted class column to the dataframe
NFT_df_pca["Class"] = model.labels_
NFT_df_pca.head()

Unnamed: 0,PC 1,PC 2,Class
0,0.07393,-1.314662,1
1,-1.336734,2.16322,0
2,9.955393,3.696914,2
3,0.275528,-0.37297,1
4,-1.103282,1.390097,0


In [23]:
predictions

array([1, 0, 2, ..., 1, 0, 1])

In [24]:
# Create a new DataFrame including predicted clusters and data features.
# Concatentate the NFT_df and clustered_df DataFrames on the same columns.
clustered_df = NFT_df_clean.join([NFT_df_pca["PC 1"],NFT_df_pca["PC 2"]], how = 'inner')


#  Add a new column, "likes" to the clustered_df DataFrame that holds the number of likes. 
clustered_df = clustered_df.join(nft_age_df, how = 'inner')

#  Add a new column, "Class" to the clustered_df DataFrame that holds the predictions.
clustered_df = clustered_df.join(NFT_df_pca["Class"], how = 'inner')

# Print the shape of the clustered_df
print(clustered_df.shape)
clustered_df.head(10)

(4173, 12)


Unnamed: 0,title,name_of_work,creator,price,type_of_nft,likes,nsfw,total_units,PC 1,PC 2,NFT_age_in_years,Class
0,30 min Drawings,Giant Frog,kristyglas,50,PHOTO,2,False,30,0.07393,-1.314662,2,1
1,Experimental Video,Biospecimens,juliakponsford,500,VIDEO,0,False,1,-1.336734,2.16322,2,0
2,Sexy Art,long legs,badsexy,10,PHOTO,0,True,2,9.955393,3.696914,1,2
3,Dream World,A Guide in my Dreams,yoslehz,20,PHOTO,1,False,2,0.275528,-0.37297,2,1
4,Dream World,Silent Observer,yoslehz,20,GIF,0,False,2,-1.103282,1.390097,2,0
5,Dream World,Joy and Chaos,yoslehz,20,GIF,0,False,2,-1.103315,1.39005,2,0
6,Dream World,I found colors in my sky,yoslehz,20,PHOTO,0,False,2,0.215993,-0.405621,2,1
7,"nothing is real, all is real",Isn´t she lovely,solymi,19,GIF,2,True,6,4.062991,7.047635,1,2
8,trippy_emmeline,Emmeline on LSD,solymi,64,GIF,1,False,3,-1.841533,4.068975,1,0
9,experimental,Angela 👼,elias15g,35,PHOTO,3,False,5,-0.017623,-0.571181,2,1


In [26]:
# Creating a 3D-Scatter with the PCA data and the clusters
fig = px.scatter_3d(
    clustered_df,
    x="PC 1",
    y="PC 2",
    z="price",
    color="Class",
    symbol="Class",
    width=800,
    hover_name = "NFT_age_in_years",
    hover_data = ["title"]
)
fig.update_layout(legend=dict(x=0, y=1))
fig.show()

In [27]:
# Scaling data to create the scatter plot with tradable cryptocurrencies.
nft_age_data_df = clustered_df.drop(['title','name_of_work','creator','type_of_nft','nsfw','total_units','likes','Class','PC 1','PC 2'], axis = 1)
nft_age_data_df = nft_age_data_df[['price','NFT_age_in_years']]
nft_age_data_df.head()

Unnamed: 0,price,NFT_age_in_years
0,50,2
1,500,2
2,10,1
3,20,2
4,20,2


In [28]:
scaler = MinMaxScaler()
nft_age_data_scaled = scaler.fit_transform(nft_age_data_df)
nft_age_data_scaled

array([[2.66666667e-04, 4.34782609e-02],
       [2.76666667e-03, 4.34782609e-02],
       [4.44444444e-05, 0.00000000e+00],
       ...,
       [2.66666667e-04, 4.34782609e-02],
       [5.38888889e-04, 4.34782609e-02],
       [3.87777778e-03, 4.34782609e-02]])

In [29]:
# Create a new DataFrame that has the scaled data with the clustered_df DataFrame index.
nft_scaled_df = pd.DataFrame(nft_age_data_scaled, index = clustered_df.index)

# Add the "title" column from the clustered_df DataFrame to the new DataFrame.
nft_scaled_df["title"] = clustered_df["title"]

# Add the "Class" column from the clustered_df DataFrame to the new DataFrame. 
nft_scaled_df["Class"] = clustered_df["Class"]

nft_scaled_df.head(10)

Unnamed: 0,0,1,title,Class
0,0.000267,0.043478,30 min Drawings,1
1,0.002767,0.043478,Experimental Video,0
2,4.4e-05,0.0,Sexy Art,2
3,0.0001,0.043478,Dream World,1
4,0.0001,0.043478,Dream World,0
5,0.0001,0.043478,Dream World,0
6,0.0001,0.043478,Dream World,1
7,9.4e-05,0.0,"nothing is real, all is real",2
8,0.000344,0.0,trippy_emmeline,0
9,0.000183,0.043478,experimental,1


In [30]:
nft_scaled_df = nft_scaled_df.rename(columns = {0:"price",1:"NFT_age_in_years"})
nft_scaled_df.head()

Unnamed: 0,price,NFT_age_in_years,title,Class
0,0.000267,0.043478,30 min Drawings,1
1,0.002767,0.043478,Experimental Video,0
2,4.4e-05,0.0,Sexy Art,2
3,0.0001,0.043478,Dream World,1
4,0.0001,0.043478,Dream World,0


In [31]:
# Create a hvplot.scatter plot using x="TotalCoinsMined" and y="TotalCoinSupply".
nft_scaled_df.hvplot.scatter(
    x="price",
    y="NFT_age_in_years",
    by="Class",
    hover_cols = "title"
)