# Power Laws

## Get Data

In [1]:
import pandas as pd
from coinmetrics.api_client import CoinMetricsClient
import logging
from scipy.stats import linregress, lognorm
import matplotlib.pyplot as plt

# Configure logging
logging.basicConfig(
    format='%(asctime)s %(levelname)-8s %(message)s',
    level=logging.INFO,
    datefmt='%Y-%m-%d %H:%M:%S'
)

# Initialize Coin Metrics API client
client = CoinMetricsClient()

# Define the asset, metric, and time range
asset = 'btc'
metric = 'ReferenceRate'
start_time = '2010-09-01'
end_time = '2024-08-31'
frequency = '1d'

# Fetch the metric data for the specified asset and time range
logging.info("Fetching BTC ReferenceRate...")
df = client.get_asset_metrics(
    assets=asset,
    metrics=[metric],
    frequency=frequency,
    start_time=start_time,
    end_time=end_time
).to_dataframe()

# Rename the 'ReferenceRate' column to 'Close'
df = df.rename(columns={metric: 'Close'})

# Set 'time' as the index
df['time'] = pd.to_datetime(df['time'])
df.set_index('time', inplace=True)

# Display the first few rows
df.head()

2024-08-31 17:12:30 INFO     Fetching BTC ReferenceRate...
2024-08-31 17:12:32 INFO     Sleeping for a rate limit window because 429 (too many requests) error was returned. Pleasesee Coin Metrics APIV4 documentation for more information: https://docs.coinmetrics.io/api/v4/#tag/Rate-limits
2024-08-31 17:12:40 INFO     Sleeping for a rate limit window because 429 (too many requests) error was returned. Pleasesee Coin Metrics APIV4 documentation for more information: https://docs.coinmetrics.io/api/v4/#tag/Rate-limits
2024-08-31 17:12:48 INFO     Sleeping for a rate limit window because 429 (too many requests) error was returned. Pleasesee Coin Metrics APIV4 documentation for more information: https://docs.coinmetrics.io/api/v4/#tag/Rate-limits


Unnamed: 0_level_0,asset,Close
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-09-01 00:00:00+00:00,btc,0.06
2010-09-02 00:00:00+00:00,btc,0.062025
2010-09-03 00:00:00+00:00,btc,0.0634
2010-09-04 00:00:00+00:00,btc,0.06085
2010-09-05 00:00:00+00:00,btc,0.062178


## Create Days

In [2]:
# Ensure 'start_date' is timezone-naive
start_date = pd.Timestamp(start_time).tz_localize(None)

# Ensure the index of df is timezone-naive
df.index = df.index.tz_localize(None)

# Create 'Days' column
df['Days'] = (df.index - start_date).days
df = df.tail(-1)

df = df.drop(columns=['asset'])
# Convert the 'Close' and 'Days' columns to numeric types, coercing errors to NaN
df['Close'] = pd.to_numeric(df['Close'], errors='coerce')
df['Days'] = pd.to_numeric(df['Days'], errors='coerce')

df = df.dropna(subset=['Close', 'Days'])
df

Unnamed: 0_level_0,Close,Days
time,Unnamed: 1_level_1,Unnamed: 2_level_1
2010-09-02,0.062025,1
2010-09-03,0.0634,2
2010-09-04,0.06085,3
2010-09-05,0.062178,4
2010-09-06,0.06165,5
...,...,...
2024-08-27,62983.442773,5109
2024-08-28,59548.007622,5110
2024-08-29,59100.774755,5111
2024-08-30,59339.790995,5112


## Create log_days and log_close

In [3]:
import numpy as np
import pandas as pd
from scipy.stats import linregress

# Convert 'Days' to Float64
df['Days'] = pd.to_numeric(df['Days'], errors='coerce').astype('float64')
df['Close'] = pd.to_numeric(df['Close'], errors='coerce').astype('float64')

df['log_days'] = np.log(df['Days'])
df['log_close'] = np.log(df['Close'])

# Convert 'log_days' to Float64
df['log_days'] = pd.to_numeric(df['log_days'], errors='coerce').astype('float64')
df['log_close'] = pd.to_numeric(df['log_close'], errors='coerce').astype('float64')

df

Unnamed: 0_level_0,Close,Days,log_days,log_close
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2010-09-02,0.062025,1.0,0.000000,-2.780221
2010-09-03,0.063400,2.0,0.693147,-2.758291
2010-09-04,0.060850,3.0,1.098612,-2.799343
2010-09-05,0.062178,4.0,1.386294,-2.777750
2010-09-06,0.061650,5.0,1.609438,-2.786282
...,...,...,...,...
2024-08-27,62983.442773,5109.0,8.538759,11.050627
2024-08-28,59548.007622,5110.0,8.538955,10.994538
2024-08-29,59100.774755,5111.0,8.539150,10.986999
2024-08-30,59339.790995,5112.0,8.539346,10.991035


## Fitting

### Linear Fitting v1

### Linear Fitting v2

## Calculate upper and lower bounds

In [5]:
# Calculate residuals
df['residuals'] = df['log_close'] - df['log_fitted']

# Shift residuals to make them all positive for log-normal fitting
shifted_residuals = df['residuals'] - df['residuals'].min() + 1e-9  # Shift by the minimum value plus a small epsilon

# Fit to a log-normal distribution
shape, loc, scale = lognorm.fit(shifted_residuals, floc=0)

# Calculate deviation levels with adjusted scaling
deviation_scale = 1.5  # Adjust this scale to control how wide the bounds are

df['upper_bound'] = np.exp(df['log_fitted'] + deviation_scale * lognorm.ppf(0.34, shape, loc=loc, scale=scale))
df['lower_bound'] = np.exp(df['log_fitted'] - deviation_scale * lognorm.ppf(0.34, shape, loc=loc, scale=scale))
df

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['residuals'] = df['log_close'] - df['log_fitted']
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['upper_bound'] = np.exp(df['log_fitted'] + deviation_scale * lognorm.ppf(0.34, shape, loc=loc, scale=scale))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['lower_bound'] = np.exp(df['log_fitte

Unnamed: 0_level_0,Close,Days,log_days,log_close,log_fitted,fitted,residuals,upper_bound,lower_bound
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2011-09-02,8.234282,366.0,5.902633,2.108306,1.290595,3.634949,0.817711,8.906500,1.483507
2011-09-03,8.621424,367.0,5.905362,2.154250,1.297158,3.658885,0.857092,8.965147,1.493276
2011-09-04,8.449653,368.0,5.908083,2.134125,1.303710,3.682936,0.830415,9.024079,1.503092
2011-09-05,8.203471,369.0,5.910797,2.104557,1.310251,3.707104,0.794306,9.083296,1.512955
2011-09-06,7.597333,370.0,5.913503,2.027797,1.316781,3.731389,0.711017,9.142800,1.522866
...,...,...,...,...,...,...,...,...,...
2024-08-27,62983.442773,5109.0,8.538759,11.050627,11.187657,72233.352383,-0.137030,176989.076471,29480.108606
2024-08-28,59548.007622,5110.0,8.538955,10.994538,11.188692,72308.133440,-0.194154,177172.308036,29510.628492
2024-08-29,59100.774755,5111.0,8.539150,10.986999,11.189726,72382.980866,-0.202727,177355.702221,29541.175465
2024-08-30,59339.790995,5112.0,8.539346,10.991035,11.190761,72457.894710,-0.199726,177539.259148,29571.749545


## Create future dates

In [6]:
# Project the future (let's assume 365 days into the future)
future_days = np.arange(df['Days'].max() + 1, df['Days'].max() + 366)
future_log_days = np.log(future_days)
future_log_fitted = intercept + slope * future_log_days
future_fitted = np.exp(future_log_fitted)

# Generate future dates starting from one day after the last available date
future_dates = pd.date_range(start=df.index[-1] + pd.Timedelta(days=1), periods=365, freq='D')
future_dates

NameError: name 'intercept' is not defined

## Plot

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import RANSACRegressor, LinearRegression
from sklearn.metrics import r2_score
from pandas.tseries.offsets import DateOffset

# Assume df, slope, intercept, etc., are already defined

# Define the number of days into the future for projections
future_days = 365

# Create a range of future dates
last_date = df.index[-1]
future_dates = pd.date_range(start=last_date, periods=future_days + 1, freq='D')[1:]

# Create future projections DataFrame
future_df = pd.DataFrame(index=future_dates)
future_df['Days'] = np.arange(df['Days'].max() + 1, df['Days'].max() + 1 + future_days)

# Log transformation for future days
future_df['log_days'] = np.log(future_df['Days'])

# Compute future fitted values
future_df['log_fitted'] = intercept + slope * future_df['log_days']
future_df['fitted'] = np.exp(future_df['log_fitted'])

# Compute upper and lower bounds for future projections
future_df['upper_bound'] = np.exp(future_df['log_fitted'] + deviation_scale * lognorm.ppf(0.34, shape, loc=loc, scale=scale))
future_df['lower_bound'] = np.exp(future_df['log_fitted'] - deviation_scale * lognorm.ppf(0.34, shape, loc=loc, scale=scale))

# Plotting historical data
plt.figure(figsize=(14, 8))
plt.plot(df.index, df['Close'], label='BTC-USD Close', color='blue')
plt.plot(df.index, df['fitted'], label='Power Law Fit', color='red')
plt.plot(df.index, df['upper_bound'], label='Upper Bound', linestyle='--', color='green')
plt.plot(df.index, df['lower_bound'], label='Lower Bound', linestyle='--', color='green')

# Plot future projections
plt.plot(future_df.index, future_df['fitted'], label='Future Projection', color='orange')
plt.plot(future_df.index, future_df['upper_bound'], label='Future Upper Bound', linestyle='--', color='green')
plt.plot(future_df.index, future_df['lower_bound'], label='Future Lower Bound', linestyle='--', color='green')

plt.legend()
plt.yscale('log')
plt.xlabel('Date')
plt.ylabel('BTC-USD Price (Log Scale)')
plt.title('BTC Power Law Model with Deviation Levels')
plt.grid(True, which='both', linestyle='--', linewidth=0.5)
plt.show()

# Output R-squared value
print(f"R-squared value of the fit: {r_value:.4f}")


In [None]:
future_df