In [2]:
import pandas as pd
from prophet import Prophet
import matplotlib.pyplot as plt
#import time_series_functions as tsf
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np


In [3]:
# Import data from csv file
df = pd.read_csv('/Users/jeanstibel/Documents/AI Classes/Projects/02 Project/Project 02/Resources/df_top_10.csv')
df.head()

Unnamed: 0,date,crypto_id,open,close,high,low,volume,number_of_trades,diff_oc,return,volatility,vol_change,ma_5,ma_10,price_movement
0,2020-09-22,AVAX,0.85,5.3193,7.0,0.85,31441883.16,375884,4.4693,525.8,723.529412,0.0,0.0,0.0,1
1,2020-09-23,AVAX,5.3279,3.535,5.36,3.4,12620608.01,168336,-1.7929,-33.651157,36.787477,-59.860521,0.0,0.0,0
2,2020-09-24,AVAX,3.5305,4.6411,4.8873,3.4033,7107843.86,104806,1.1106,31.457301,42.033706,-43.680654,0.0,0.0,1
3,2020-09-25,AVAX,4.6366,4.7134,5.0234,4.2001,3886096.71,78381,0.0768,1.656386,17.756546,-45.326645,0.0,0.0,1
4,2020-09-26,AVAX,4.7164,4.52,4.82,4.3294,1814932.97,34016,-0.1964,-4.164193,10.402002,-53.296763,4.54576,0.0,0


In [4]:
# Get list of'crypto_id's
crypto_ids = df['crypto_id'].unique()
crypto_ids

array(['AVAX', 'BTC', 'CRV', 'DEXE', 'DOGE', 'ENA', 'ENJ', 'FIL', 'GRT',
       'ICP', 'IOTA', 'MANA', 'QTUM', 'SAND', 'SHIB', 'SOL', 'TFUEL',
       'UNI'], dtype=object)

In statistics, the standard deviation is a measure of the amount of variation of the values of a variable about its mean. A low standard deviation indicates that the values tend to be close to the mean of the set, while a high standard deviation indicates that the values are spread out over a wider range.

In [5]:
# Calculate the standard deviation of the 'close' prices for each 'crypto_id'
crypto_volatility = df.groupby('crypto_id')['volatility'].std().reset_index()

# get volatility in order ascending
crypto_volatility = crypto_volatility.sort_values(by='volatility', ascending=True)

# get the 10 lest volatile crypto
crypto_volatility = crypto_volatility.head(10)

crypto_volatility

Unnamed: 0,crypto_id,volatility
1,BTC,3.96726
10,IOTA,6.093208
12,QTUM,6.271547
15,SOL,6.688596
6,ENJ,7.229954
16,TFUEL,9.290398
11,MANA,10.042802
14,SHIB,10.472944
2,CRV,11.629367
5,ENA,12.743082


In [6]:
def prepare_data_for_prophet(df, crypto_id):
    # Filter the data for the specified cryptocurrency
    df_crypto = df[df['crypto_id'] == crypto_id]

    # Prepare the data for Prophet
    df_crypto = df_crypto[['date', 'volatility']]
    df_crypto.columns = ['ds', 'y']

    # Convert the 'ds' column to datetime
    df_crypto['ds'] = pd.to_datetime(df_crypto['ds'])

    # Aggregate data to daily level (if necessary)
    df_crypto = df_crypto.groupby('ds').mean().reset_index()

    return df_crypto

# Prepare data for the top 10 least volatile crypto_ids
top_10_low_volatile_ids = crypto_volatility['crypto_id'].unique()
prepared_data_top_10_low_volatile = {crypto_id:
    prepare_data_for_prophet(df, crypto_id) for crypto_id in top_10_low_volatile_ids}


In [7]:
# Loop through each dataframe in the dictionary and fit the model
for crypto_id, df_prepared in prepared_data_top_10_low_volatile.items():
    
	# Initialize the Prophet model
	model = Prophet()
	
	# Fit the model on the data
	model.fit(df_prepared)
	
	# Create a dataframe to hold predictions
	future = model.make_future_dataframe(periods=30)
	
	# Make predictions
	forecast = model.predict(future)
	
	# Print the forecast for the current cryptocurrency
	display(f'Forecast for {crypto_id}:')
	display(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head())

21:52:49 - cmdstanpy - INFO - Chain [1] start processing
21:52:49 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for BTC:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2017-08-17,10.560776,5.500805,14.654532
1,2017-08-18,10.335265,5.772437,14.764564
2,2017-08-19,8.511822,3.420278,13.226597
3,2017-08-20,8.980217,4.526449,13.489805
4,2017-08-21,10.354268,6.071852,14.474091


21:52:49 - cmdstanpy - INFO - Chain [1] start processing
21:52:50 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for IOTA:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2018-05-31,13.587636,6.490932,21.335232
1,2018-06-01,12.873579,5.688215,19.633032
2,2018-06-02,11.398683,4.652891,18.60348
3,2018-06-03,11.444629,4.150083,18.339532
4,2018-06-04,13.21185,6.268616,19.70085


21:52:50 - cmdstanpy - INFO - Chain [1] start processing
21:52:50 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for QTUM:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2018-03-19,12.280536,4.704358,19.528435
1,2018-03-20,11.418273,3.746069,18.398536
2,2018-03-21,11.954352,5.084305,19.555212
3,2018-03-22,12.017772,4.781628,19.720511
4,2018-03-23,11.570257,4.6098,19.117957


21:52:50 - cmdstanpy - INFO - Chain [1] start processing
21:52:50 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for SOL:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2020-08-11,15.969535,8.546114,23.27632
1,2020-08-12,16.935885,9.36812,24.684499
2,2020-08-13,16.182777,8.412007,23.786919
3,2020-08-14,16.266803,8.62175,23.937397
4,2020-08-15,14.211073,6.910086,21.247868


21:52:50 - cmdstanpy - INFO - Chain [1] start processing
21:52:50 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for ENJ:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2019-04-18,9.945097,1.454436,17.744025
1,2019-04-19,9.168362,0.963255,17.210244
2,2019-04-20,7.469439,-0.526892,16.098899
3,2019-04-21,8.047673,-0.021089,16.720879
4,2019-04-22,9.595891,0.797587,17.618023


21:52:50 - cmdstanpy - INFO - Chain [1] start processing
21:52:50 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for TFUEL:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2019-05-24,19.266589,7.969289,29.80263
1,2019-05-25,17.345775,6.745646,28.261788
2,2019-05-26,17.930145,6.809059,29.269964
3,2019-05-27,18.440376,6.651216,30.142624
4,2019-05-28,17.786114,6.397109,28.462391


21:52:51 - cmdstanpy - INFO - Chain [1] start processing
21:52:51 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for MANA:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2020-08-06,16.648314,5.136132,28.254453
1,2020-08-07,16.773872,4.384781,28.960465
2,2020-08-08,16.473339,4.441712,27.922139
3,2020-08-09,15.191308,4.487028,27.278911
4,2020-08-10,16.032694,4.50722,26.993652


21:52:51 - cmdstanpy - INFO - Chain [1] start processing
21:52:51 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for SHIB:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2021-05-10,34.075977,22.043609,45.921499
1,2021-05-11,32.155135,19.994184,43.505314
2,2021-05-12,32.483828,20.787159,43.826295
3,2021-05-13,31.435707,18.95038,43.595789
4,2021-05-14,30.854931,18.460047,42.928346


21:52:51 - cmdstanpy - INFO - Chain [1] start processing
21:52:51 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for CRV:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2020-08-15,36.28822,22.828796,49.782248
1,2020-08-16,34.271047,21.475171,47.097143
2,2020-08-17,36.108889,23.833479,49.083639
3,2020-08-18,35.528294,22.198013,48.859952
4,2020-08-19,35.35278,22.11983,47.62328


21:52:51 - cmdstanpy - INFO - Chain [1] start processing
21:52:51 - cmdstanpy - INFO - Chain [1] done processing


'Forecast for ENA:'

Unnamed: 0,ds,yhat,yhat_lower,yhat_upper
0,2024-04-02,34.273726,19.702603,49.026785
1,2024-04-03,32.362769,17.292132,46.803448
2,2024-04-04,29.19472,14.999626,43.479834
3,2024-04-05,30.470192,16.305813,44.711373
4,2024-04-06,26.745514,10.375803,41.791211


In [8]:
# Create a dataframe to hold predictions
future = model.make_future_dataframe(periods=30)

# Make predictions
forecast = model.predict(future)

In [9]:
# Print the forecast for the current cryptocurrency and save the results in the file 'Forecast'

for crypto_id in top_10_low_volatile_ids:
    df_prepared = prepared_data_top_10_low_volatile[crypto_id]
    
    # Initialize the Prophet model
    model = Prophet()
    
    # Fit the model on the data
    model.fit(df_prepared)
    
    # Create a dataframe to hold predictions
    future = model.make_future_dataframe(periods=30)
    
    # Make predictions
    forecast = model.predict(future)
    
    # Plot the forecast
    fig1 = model.plot(forecast)
    plt.title(f'{crypto_id} Volatility Price Forecast')
    plt.xlabel('Date')
    plt.ylabel('Volatility Price')
    plt.savefig(f'/Users/jeanstibel/Documents/AI Classes/Projects/project_02/Forecast/{crypto_id}_forecast.png')
    plt.close(fig1)
    
    # Plot the forecast components
    fig2 = model.plot_components(forecast)
    plt.suptitle(f'{crypto_id} Forecast Components', fontsize=16)
    plt.savefig(f'/Users/jeanstibel/Documents/AI Classes/Projects/project_02/Forecast/{crypto_id}_forecast_components.png')
    plt.close(fig2)



21:52:51 - cmdstanpy - INFO - Chain [1] start processing
21:52:51 - cmdstanpy - INFO - Chain [1] done processing
21:52:52 - cmdstanpy - INFO - Chain [1] start processing
21:52:52 - cmdstanpy - INFO - Chain [1] done processing
21:52:52 - cmdstanpy - INFO - Chain [1] start processing
21:52:52 - cmdstanpy - INFO - Chain [1] done processing
21:52:53 - cmdstanpy - INFO - Chain [1] start processing
21:52:53 - cmdstanpy - INFO - Chain [1] done processing
21:52:53 - cmdstanpy - INFO - Chain [1] start processing
21:52:53 - cmdstanpy - INFO - Chain [1] done processing
21:52:53 - cmdstanpy - INFO - Chain [1] start processing
21:52:53 - cmdstanpy - INFO - Chain [1] done processing
21:52:54 - cmdstanpy - INFO - Chain [1] start processing
21:52:54 - cmdstanpy - INFO - Chain [1] done processing
21:52:54 - cmdstanpy - INFO - Chain [1] start processing
21:52:54 - cmdstanpy - INFO - Chain [1] done processing
21:52:54 - cmdstanpy - INFO - Chain [1] start processing
21:52:54 - cmdstanpy - INFO - Chain [1]

In [10]:
from sklearn.metrics import mean_absolute_error

# Calculate MAE for each of the top 10 low volatile cryptocurrencies
for crypto_id in top_10_low_volatile_ids:
    df_prepared = prepared_data_top_10_low_volatile[crypto_id]
    
    # Ensure the lengths of the dataframes are the same
    min_length = min(len(df_prepared), len(forecast))
    df_prepared = df_prepared.iloc[:min_length]
    forecast_trimmed = forecast.iloc[:min_length]
    
    mae = mean_absolute_error(df_prepared['y'], forecast_trimmed['yhat'])
    print(f'Mean Absolute Error for {crypto_id}: {mae}')

Mean Absolute Error for BTC: 7.251466790400175
Mean Absolute Error for IOTA: 6.887395242898888
Mean Absolute Error for QTUM: 6.321945385292588
Mean Absolute Error for SOL: 6.169704230539761
Mean Absolute Error for ENJ: 6.292023338446587
Mean Absolute Error for TFUEL: 6.856254433020692
Mean Absolute Error for MANA: 7.0504115843508774
Mean Absolute Error for SHIB: 7.930981903032593
Mean Absolute Error for CRV: 8.342744254696001
Mean Absolute Error for ENA: 4.914553630511177
