In [1]:
import numpy as np
import pandas as pd
from scipy.stats import norm
import scipy.stats as si

## QUESTIONS
### 1. Determine the correlation between the energy used to seal the Filecoin data and the price of its $FIL token (5 pts)
### 2. Determine the correlation between the capacity of storage added per day in the Filecoin network and the price of its $FIL token (5pts)
### 3. Determine the correlation between the energy consumption rate of the Filecoin network and its $FIL token price (5 pts)
### 4. What observations can you deduce from these 3 correlations? (5 pts)


# IMPORTING DATA

In [2]:
consum = pd.read_csv("FilecoinGreenEnergyConsumption-FilecoinGreenEnergyConsumption.csv", 
                          parse_dates = ['Date'])

# removing timezone for plotting
consum["Date"] = consum["Date"].apply(lambda x: x.replace(tzinfo=None))

In [3]:
filusd = pd.read_csv("FIL-USD-FIL-USD.csv", parse_dates = ['Date'])
filusd['Log Returns'] = np.log(filusd['Adj Close']/filusd['Adj Close'].shift(1))
filusd_relevant = filusd[["Date","Adj Close",'Log Returns']]
filusd_relevant = filusd_relevant.rename({"Adj Close": "Price"}, axis=1)

In [4]:
merged = consum.merge(filusd_relevant, on="Date") #merging price & returns dataframe with the consumption date acc to date.

## VANILLA CORRELATION

In [5]:
corr_matrix = merged[["Data storage capacity added per day", "Energy consumption rate estimate", "Energy used to seal data estimate", "Price"]].corr()
corr_matrix

Unnamed: 0,Data storage capacity added per day,Energy consumption rate estimate,Energy used to seal data estimate,Price
Data storage capacity added per day,1.0,0.635297,1.0,0.583667
Energy consumption rate estimate,0.635297,1.0,0.635297,0.083658
Energy used to seal data estimate,1.0,0.635297,1.0,0.583667
Price,0.583667,0.083658,0.583667,1.0


In [6]:
# Correlation between the energy used to seal the Filecoin data and the price of its $FIL token & p-value
corr_pair1 = si.pearsonr(merged["Energy used to seal data estimate"], merged["Price"])
corr_pair1

(0.5836671865903876, 9.075065379224059e-73)

In [7]:
# Correlation between the capacity of storage added per day in the Filecoin network and the price of its $FIL token & p-value
corr_pair2 = si.pearsonr(merged["Data storage capacity added per day"], merged["Price"])
corr_pair2

(0.5836671865917777, 9.075065370468663e-73)

In [8]:
# Correlation between the energy consumption rate of the Filecoin network and its $FIL token price & p-value
corr_pair3 = si.pearsonr(merged['Energy consumption rate estimate'], merged["Price"])
corr_pair3

(0.08365776171487421, 0.019140267126659852)

In [9]:
print(f'Q1. Correlation: {corr_pair1[0]}, p-value: {corr_pair1[1]}. Statistically Significant (significance lvl = 5%)?: {corr_pair1[1] < 0.05} ')
print(f'Q2. Correlation: {corr_pair2[0]}, p-value: {corr_pair2[1]}. Statistically Significant (significance lvl = 5%)?: {corr_pair2[1] < 0.05} ')
print(f'Q3. Correlation: {corr_pair3[0]}, p-value: {corr_pair3[1]}. Statistically Significant (significance lvl = 5%)?: {corr_pair3[1] < 0.05} ')


Q1. Correlation: 0.5836671865903876, p-value: 9.075065379224059e-73. Statistically Significant (significance lvl = 5%)?: True 
Q2. Correlation: 0.5836671865917777, p-value: 9.075065370468663e-73. Statistically Significant (significance lvl = 5%)?: True 
Q3. Correlation: 0.08365776171487421, p-value: 0.019140267126659852. Statistically Significant (significance lvl = 5%)?: True 


### Q1. 
- Correlation: 0.5836671865903876 
- p-value: 9.075065379224059e-73
- Statistically Significant (significance lvl = 5%)?: True
### Q2. 
- Correlation: 0.5836671865917777
- p-value: 9.075065370468663e-73
- Statistically Significant (significance lvl = 5%)?: True 
### Q3. 
- Correlation: 0.08365776171487421
- p-value: 0.019140267126659852 
- Statistically Significant (significance lvl = 5%)?: True


### Q4. What observations can you deduce from these 3 correlations?
- All correlations are statistically significant at 5% level of significance.
- Moderate positive correlation between prices of FIL token and energy used to seal the Filecoin data. (0.5836671865903877)
- Moderate positive correlation between prices of FIL token and capacity of storage added per day in the Filecoin network. (0.5836671865917775). 
        -> This would mean that the variance of capacity of storage added per day can be partially "explained" by the prices of FIL token.

- The correlations of FIL and each of the 2 variables is almost equal. This is because the 2 variables are perfectly correlated (corr = 1) due to the fact that "Energy use due to sealing is estimated by multiplying the increase in storage capacity over a given time period by a constant value". 

- Low (almost negligible) positive correlation between prices of FIL token and energy consumption rate of the Filecoin network. (0.08365776171487402)
- Energy consumption rate of the Filecoin network is relatively uncorrelated with the price of FIL token.