# Volatility Analysis


In [None]:
import pandas as pd
import os
# os.getenv
from dotenv import load_dotenv
import hvplot.pandas
import requests
from utils import *
from collect_contracts import *
import seaborn as sns
import numpy as np

### Importing the data

In [None]:
load_dotenv()

rarify_api_key = os.getenv("RARIFY_API_KEY")
display(type(rarify_api_key))

In [None]:
top_collections_baseurl = f"https://api.rarify.tech/data/contracts?include=insights&sort=-unique_buyers"

In [None]:
# In my analysis these two datapoints served to be problematic in skewing the data so I remove them here to avoid issues down the line
# Top collections return is a dictionary holding the collection's contract id as the key and the network name being held as its value. 

top_collections_return = fetch_top_collections(top_collections_baseurl, rarify_api_key)

del top_collections_return ['a310425046661c523d98344f7e9d66b32195365d']
del top_collections_return ['495f947276749ce646f68ac8c248420045cb7b5e']
del top_collections_return ['57f1887a8bf19b14fc0df6fd9b2acc9af147ea85']
del top_collections_return ['c36442b4a4522e871399cd717abdd847ab11fe88']
top_collections_return

In [None]:
collection_df = fetch_top_50_collections_data_api(top_collections_return, rarify_api_key)
# collection_df = pd.DataFrame(collection_df)
coll_df = collection_df.copy()

In [None]:
collection_df

In [None]:
# replace with append_column_names(df, contract_ids)
# append_column_names(coll_df, top_collections_return)

cols = ["avg_price", "max_price", "min_price", "trades", "unique_buyers", "volume"]
new_cols = []
for key in top_collections_return.keys():
    for c in cols:
        new_cols.append(f"{key}_{c}")

In [None]:
top_collection_df = collection_df.copy()
top_collection_df.columns = new_cols
top_collection_df.head()

In [None]:
avg_price_df = find_avg_price_df(top_collection_df, top_collections_return)
min_price_df = find_min_price_df(top_collection_df, top_collections_return)
max_price_df = find_max_price_df(top_collection_df, top_collections_return)
volume_df = find_volume_df(top_collection_df, top_collections_return)
pct_chg_df = find_pct_change_df(top_collection_df, top_collections_return)

In [None]:
volume_df = volume_df.mean()

In [None]:
std_dev_df = find_std_dev_df(pct_chg_df, top_collections_return)

In [None]:
std_dev_df = std_dev_df.mean()

In [None]:
avg_price_df = avg_price_df.mean()
min_price_df = min_price_df.mean()
max_price_df = max_price_df.mean()
pct_chg_df = pct_chg_df.mean()



In [None]:
pct_chg_df = pct_chg_df[0: len(top_collections_return)]
pct_chg_df

In [None]:
df_list = [avg_price_df, min_price_df, max_price_df, volume_df, pct_chg_df, std_dev_df]
keys = ['avg_price', 'min_price', 'max_price', 'volume', 'pct_chg', 'std_dev']
sum_df = pd.concat(df_list, axis=1, keys=keys)

In [None]:
sum_df

In [None]:
# sum_df.to_csv('top_collections_data.csv')

In [None]:
corr_df = sum_df[keys].corr()

In [None]:
corr_df

In [None]:
sns.heatmap(corr_df)

In [None]:
# This function appends pct_chg columns to the end of the current working dataframe
pct_chg_pls_df = find_pct_change(top_collection_df, top_collections_return)

In [None]:
# Returns the length of the Dataframe containing only the percent change values
top_collections_pct_chg = pct_chg_pls_df.iloc[:, -(len(top_collections_return)):]

In [None]:
# We append a new category here called the basket_pct_chg which is the mean of the pct_chg values of the top collections
top_collections_pct_chg['top_collections_basket_pct_chg'] = top_collections_pct_chg[top_collections_pct_chg.columns].mean(axis=1)
top_collections_pct_chg = top_collections_pct_chg.dropna()
top_collections_pct_chg.head()
top_collections_pct_chg.columns

In [None]:
# we find the average pct_chgs for the collections in our dataset
pct_chgs = top_collections_pct_chg[top_collections_pct_chg.columns].mean()
pct_chgs

In [None]:
# Here we find the standard deviations of the pct changes for the top collections dataframe and we return only those standard deviation values from the dataframe
def find_std_devs(df, contract_ids):
    for col in df.columns:
        df[f"{col}_std_dev"] = df[col].std()
    return df[df.columns[-(len(contract_ids) + 1):]].mean()
std_devs = find_std_devs(top_collections_pct_chg, top_collections_return)

In [None]:
std_devs

In [None]:
# Standard deviation is an indication of the variability of the price changes of the data.  A collection with a higher standard deviation we would evaluate as more volatile.
std_devs.hvplot.bar()

In [None]:
# The betas for each collection are derived by comparing it to the whole set of data. 
# Ideally we would use a much broader index for this analysis which would take into account hundreds of NFT collections
# For the time being, these figures can tell us how risky one collection is compared to the range of collections.
# For fluctuations in the market, which collections are going to be able to withstand price changes in order to serve as safe and stable collateral

betas = find_beta(top_collections_pct_chg, top_collections_return)
betas_series = pd.Series(betas)
betas_series

In [None]:
# This is a preprocessing step that I used to prototype a linear regression model that I ended up not using. This can be ignored for now. 
x_values = [0,1,2,3,4,5,6,7,8]
analysis_list = [x_values, betas_series, std_devs, pct_chgs]


In [None]:
# The column names for our dataframe are renamed here to make them more reader friendly
# The column names correspond to the statistics that we are analyzing
# The row names correspond to a collection in our dataset excluding the 8th index which corresponds to the basket of collections
index_y = {0: "index", 1: "beta", 2: "std_dev", 3: "pct_chg"}
analysis_df = pd.DataFrame(analysis_list).transpose()
analysis_df = analysis_df.rename(index_y, axis=1)
# analysis_df = analysis_df.drop(8, axis=0)
copy_analysis_df = analysis_df.copy()

In [None]:
analysis_df['beta'].hvplot.bar()

In [None]:
analysis_df.hvplot.scatter(x="std_dev", y="beta")

In [None]:
# Each series in our dataframe is turned into a list to prepare it for a correlation coefficient function
std_lst = list(std_devs[0:8].values)
beta_lst = list(betas.values())
pct_chg_lst = list(pct_chgs[0:8].values)


### For each statistic in our dataframe...
We run a correlation function to find the correlation between that statistic and our Beta value. 
In doing so we can see how much each statistical measure (ie. avg_pct_chg, avg_std_dev) is correlated to our Beta value. 
This information may be obvious, (ie. standard deviation is used to calculate beta so it likely has a high correlation to beta, and standard deviation is a derivative of percent change, so percent change likely has a high correlation to Beta), however, I believe that it is important to see the correlational decay as we abstract to more fundamental observations. It may be beneficial to use a rolling percent change as a proxy for Beta and save time and computational resources, or it may be beneficial to take the extra step of deriving standard deviation. A correlational analysis will help shed light on that.

It will also be useful going forward to include other variables in our correlational approach as it will all help paint a broader picture of what goes into determining the riskiness and volatility of an asset. In the following cells I also run a correlation function on average price and Beta. In that example I find that average price is weakly negatively correlated with a higher beta. This fact should not be surprising, and I expect this result to be reinforced with more data. To explain this, as a product has a higher value, it becomes more sought after in the market and is subject to greater market scrutiny, in addition to becoming more desirable. This helps to instantiate the true market value of the product with decreasing deviation in market price. A highly stable asset such as a piece of real estate, is highly stable because we can compare it to the value of thousands if not millions of others of the same class of asset in similar regions, or having similar specifications. We know that a million dollar house in San Francisco will not be worth 200,000 dollars tomorrow. Similarly, we can be more certain (evaluate as having less risk), that a Crypto Punk will not lose 75% of its value overnight. We may not be able to say the same as an emerging asset with relatively low trading volume.

This analysis is all done with an unspoken (until now) assumption that Beta is the holy grail of risk analysis and volatility relative to the market. For the time being, I believe this to be the case however that notion could very easily be disrupted. Additional factors that we will invariably have to consider is how to determine the risk of an NFT asset versus an asset of another class. An analysis of this type will help to determine a baseline collateralization discount factor to apply to NFTs of almost any type. Perhaps NFTs as a whole are so vulnerable to volatility that we apply an additional 10% discount to their value when compared to something like a car. We also must keep in mind the duration of the loan being provided when making this decision. 


In [None]:
std_beta_corr = np.corrcoef(std_lst, beta_lst)
std_beta_corr

In [None]:
# Here we see the decay of Beta correlation as we abstract to higher fundamental observations
# Percent change of the average price of the collection correlates 86% to the Beta value
pct_chg_beta_corr = np.corrcoef(pct_chg_lst, beta_lst)
pct_chg_beta_corr

In [None]:
avg_prices_df = find_avg_price(top_collection_df, top_collections_return)
max_prices_df = find_max_price(top_collection_df, top_collections_return)
min_prices_df = find_min_price(top_collection_df, top_collections_return)
volume_df = find_volume(top_collection_df, top_collections_return)

In [None]:
avg_prices = avg_prices_df.iloc[0]
avg_prices_lst = list(avg_prices[0:8].values)

In [None]:
max_prices = max_prices_df.iloc[0]
max_prices_lst = list(max_prices[0:8].values)

In [None]:
min_prices = min_prices_df.iloc[0]
min_prices_lst = list(min_prices[0:8].values)

In [None]:
vol_avg = volume_df.iloc[0]
volume_lst = list(vol_avg[0:8].values)

In [None]:
# Here, average prices weakly, negatively correlates to beta
avg_prices_beta_corr = np.corrcoef(avg_prices_lst, beta_lst)
avg_prices_beta_corr 

In [None]:
max_prices_beta_corr = np.corrcoef(max_prices_lst, beta_lst)
max_prices_beta_corr

In [None]:
min_prices_beta_corr = np.corrcoef(min_prices_lst, beta_lst)
min_prices_beta_corr

In [None]:
vol_beta_corr = np.corrcoef(volume_lst, beta_lst)
vol_beta_corr

### Results
The results of the average prices correlation to beta reveal that average price could be used to weakly indicate the stability of a collection but it certainly isn't a silver bullet method. This is promising because it opens the door for smaller collections to be using used as a stable enough asset for collateralization. Our initial hypothesis is generally true that we can have more faith in the top collections to perform well as collateral, but singularly viewing top collections as the only viable asset is an error. 

## Future Applications

* community size (twitter followers, etc.)
* time in the market
* unique buyers
* whale holders