In [None]:
!pip install fitter
!pip install cryptocmd

In [3]:
import os
from functools import reduce
import random

# Data Processing
import numpy as np
import pandas as pd
import datetime as dt

import seaborn as sns
import matplotlib.pyplot as plt
# !pip install fitter
from fitter import Fitter, get_common_distributions, get_distributions

# api for web scraping coinmarketcap
from cryptocmd import CmcScraper

# crypto api
from requests import Request, Session
from requests.exceptions import ConnectionError, Timeout, TooManyRedirects
import json

# get distributions from scipy
from scipy.stats import cauchy,chi2,expon,exponpow,gamma,lognorm,norm,powerlaw,rayleigh,uniform

In [None]:
def plot_distributions(df):
  df_visual = df.replace(np.nan,0)

  if np.isinf(df).values.sum() > 0:
    df_visual.replace([np.inf, -np.inf], 0, inplace = True)

  fig, axes = plt.subplots(9,2, figsize = (20,50))
  row_num = 0

  for col in df.drop(columns = 'date').columns:
    sns.histplot(data = df_visual, x = col, ax = axes[row_num][0])
    sns.lineplot(data = df_visual, x = 'date', y = col, ax = axes[row_num][1])
    row_num += 1

In [6]:
def find_best_distribution(df):
  best_dist = {}

  for col in df.drop('date',axis = 1).columns:
    f = Fitter(
        df[col].dropna().values,
        distributions = get_common_distributions(),
        bins = 200
    )

    f.fit()
    best_dist[col] = f.get_best(method= 'sumsquare_error')

  return best_dist

In [7]:
def get_name_parameters(column):
  return next(iter(best_dist[column])), next(iter(best_dist[column].values()))

In [9]:
'''cauchy,
 chi2,
 expon,
 exponpow,
 gamma,
 lognorm,
 norm,
 powerlaw,
 rayleigh,
 uniform'''

def cauchy_pdf(x, parameters):
  return cauchy.pdf(x = x, loc = parameters['loc'], scale = parameters['scale'])

def chi2_pdf(x, parameters, data):
  df = (data.shape[0]-1) * (data.shape[1]-1)
  return chi2.pdf(x = x, df = df, loc = parameters['loc'], scale = parameters['scale'])

def expon_pdf(x ,parameters):
  return expon.pdf(x = x, loc = parameters['loc'], scale = parameters['scale'])

def exponpow_pdf(x, parameters):
  return exponpow.pdf(x = x, b = parameters['b'], loc = parameters['loc'], scale = parameters['scale'])

def gamma_pdf(x, parameters):
  return gamma.pdf(x = x, a = parameters['a'], loc = parameters['loc'], scale = parameters['scale'])

def lognorm_pdf(x, parameters):
  return lognorm.pdf(x = x, s = parameters['s'], loc = parameters['loc'], scale = parameters['scale'])

def norm_pdf(x, parameters):
  return norm.pdf(x = x, loc = parameters['loc'], scale = parameters['scale'])

def powerlaw_pdf(x, parameters):
  return powerlaw.pdf(x = x, a = parameters['a'], loc = parameters['loc'], scale = parameters['scale'])

def rayleigh_pdf(x, parameters):
  return rayleigh.pdf(x = x, loc= parameters['loc'], scale = parameters['scale'])

def uniform_pdf(x, parameters):
  return uniform.pdf(x = x, loc = parameters['loc'], scale = parameters['scale'])

get_pdfs = {
    'cauchy':cauchy_pdf,
    'chi2':chi2_pdf,
    'expon':expon_pdf,
    'exponpow':exponpow_pdf,
    'gamma':gamma_pdf,
    'lognorm':lognorm_pdf,
    'norm':norm_pdf,
    'powerlaw':powerlaw_pdf,
    'rayleigh':rayleigh_pdf,
    'uniform':uniform_pdf
}


In [10]:
url = 'https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest'
parameters = {
  'start':'1',
  'limit':'5000',
  'convert':'USD',
  'sort':'market_cap',
  'sort_dir':'desc',
  'circulating_supply_max':'100000000000000000',
}
headers = {
  'Accepts': 'application/json',
  'X-CMC_PRO_API_KEY': '114eccc4-72b1-4540-8d65-dd998d202503',
}

session = Session()
session.headers.update(headers)

try:
  response = session.get(url, params=parameters)
  data = json.loads(response.text)
  #print(data)
except (ConnectionError, Timeout, TooManyRedirects) as e:
  print(e)

In [11]:
coin_list = {}
for i in range(5000):
    name = data['data'][i]['name']

    coin_list[name] = {}

    coin_list[name]["symbol"] = data['data'][i]['symbol']
    coin_list[name]["id"] = data['data'][i]['id']
    coin_list[name]["circulating_supply"] = data['data'][i]['circulating_supply']
    coin_list[name]["max_supply"] = data['data'][i]['max_supply']
    coin_list[name]["total_supply"] = data['data'][i]['total_supply']
    coin_list[name]["market_pairs"] = data['data'][i]['num_market_pairs']
    coin_list[name]["market_cap"] = data['data'][i]['quote']['USD']['market_cap']
    coin_list[name]["dominance"] = data['data'][i]['quote']['USD']['market_cap_dominance']
    coin_list[name]["price"] = data['data'][i]['quote']['USD']['price']
    coin_list[name]["pc_1h"] = data['data'][i]['quote']['USD']['percent_change_1h']
    coin_list[name]["pc_24h"] = data['data'][i]['quote']['USD']['percent_change_24h']
    coin_list[name]["pc_7d"] = data['data'][i]['quote']['USD']['percent_change_7d']
    coin_list[name]["pc_30d"] = data['data'][i]['quote']['USD']['percent_change_30d']
    coin_list[name]["pc_60d"] = data['data'][i]['quote']['USD']['percent_change_60d']
    coin_list[name]["pc_90d"] = data['data'][i]['quote']['USD']['percent_change_90d']
    coin_list[name]["volume"] = data['data'][i]['quote']['USD']['volume_24h']
    coin_list[name]["volume_change"] = data['data'][i]['quote']['USD']['volume_change_24h']

In [13]:
coin_list

{'Bitcoin': {'symbol': 'BTC',
  'id': 1,
  'circulating_supply': 19008481,
  'max_supply': 21000000,
  'total_supply': 19008481,
  'market_pairs': 9319,
  'market_cap': 801184955585.21,
  'dominance': 41.2763,
  'price': 42148.81534117376,
  'pc_1h': 0.27382713,
  'pc_24h': -1.4531326,
  'pc_7d': -8.33064086,
  'pc_30d': 8.0273663,
  'pc_60d': -3.92164472,
  'pc_90d': -0.07090078,
  'volume': 20734185197.437717,
  'volume_change': 38.362},
 'Ethereum': {'symbol': 'ETH',
  'id': 1027,
  'circulating_supply': 120329613.5615,
  'max_supply': None,
  'total_supply': 120329613.5615,
  'market_pairs': 5637,
  'market_cap': 381801525366.3,
  'dominance': 19.669,
  'price': 3172.9639451651915,
  'pc_1h': 0.15647517,
  'pc_24h': -2.52749973,
  'pc_7d': -9.10094402,
  'pc_30d': 23.10165303,
  'pc_60d': -0.43917428,
  'pc_90d': 1.86520779,
  'volume': 12071273996.590193,
  'volume_change': 35.0177},
 'Tether': {'symbol': 'USDT',
  'id': 825,
  'circulating_supply': 82534930596.50717,
  'max_suppl

In [None]:
random_coin_name = random.sample(list(coin_name), 1)
random_coin_symbol = coin_name[random_coin_name[0]]
print(random_coin_name, random_coin_symbol)

In [4]:
# initialise scraper without time interval
scraper = CmcScraper('BTC')

# get raw data as list of list
headers, data = scraper.get_data()

# get data in a json format
xrp_json_data = scraper.get_data("json")

# # export the data as csv file, you can also pass optional `name` parameter
# scraper.export("csv", name="bit_all_time")

# Pandas dataFrame for the same data
df = scraper.get_dataframe()

In [6]:
df.head()

Unnamed: 0,Date,Open,High,Low,Close,Volume,Market Cap
0,2022-04-10,42781.090463,43376.38066,42021.206802,42207.670964,17654480000.0,802293400000.0
1,2022-04-09,42282.079395,42786.817837,42183.253908,42782.135317,16050770000.0,813175800000.0
2,2022-04-08,43505.136803,43903.019565,42183.287121,42287.663512,27216000000.0,803736500000.0
3,2022-04-07,43207.49887,43860.697559,42899.906609,43503.849166,26101970000.0,826814800000.0
4,2022-04-06,45544.35562,45544.35562,43193.954001,43206.737673,39393400000.0,821131600000.0


In [7]:
df.drop(['Open','High','Low'], axis = 1, inplace = True)

df['close_24h'] = df['Close'].shift(periods = -1)
df['close_7d'] = df['Close'].shift(periods = -7)
df['close_30d'] = df['Close'].shift(periods = -30)
df['close_60d'] = df['Close'].shift(periods = -60)
df['close_90d'] = df['Close'].shift(periods = -90)
df['volume_24h'] = df['Volume'].shift(periods = -1)

df['%price_change_24h'] =   (  df['Close'] / df['close_24h'] - 1 ) * 100
df['%price_change_7d'] =  (  df['Close'] /  df['close_7d'] - 1 ) * 100
df['%price_change_30d'] =   (  df['Close'] / df['close_30d'] - 1 ) * 100
df['%price_change_60d'] =   (  df['Close'] / df['close_60d'] - 1 ) * 100
df['%price_change_90d'] =   (  df['Close'] / df['close_90d'] - 1 ) * 100
df['%volume_change_24h'] =   (df['Volume'] / df['volume_24h'] - 1 ) * 100

df.drop(['close_24h','close_7d','close_30d','close_60d','close_90d','volume_24h'], 
        axis = 1, inplace = True)
df.rename({'Close':'price'}, axis = 1, inplace = True)
df.columns = df.columns.str.lower()
df.fillna(0, inplace = True)

In [8]:
df.head()

Unnamed: 0,date,price,volume,market cap,%price_change_24h,%price_change_7d,%price_change_30d,%price_change_60d,%price_change_90d,%volume_change_24h
0,2022-04-10,42207.670964,17654480000.0,802293400000.0,-1.342767,-9.140086,8.796753,-4.80646,0.923951,9.991439
1,2022-04-09,42782.135317,16050770000.0,813175800000.0,1.169305,-6.729637,8.480961,-3.028917,2.077071,-41.024488
2,2022-04-08,42287.663512,27216000000.0,803736500000.0,-2.795582,-8.629729,0.72586,-3.541544,1.326787,4.267962
3,2022-04-07,43503.849166,26101970000.0,826814800000.0,0.687651,-4.468351,12.304896,2.573337,4.682498,-33.740231
4,2022-04-06,43206.737673,39393400000.0,821131600000.0,-5.156853,-8.193175,13.51661,4.260432,0.106136,32.903485


In [None]:
print('inf',np.isinf(df).values.sum())
print('null',df.isnull().sum())

In [None]:
plot_distributions(df)
plt.show()

In [None]:
if np.isinf(df).values.sum() > 0:
  df_find = df.replace([np.inf, -np.inf], np.nan)
  best_dist = find_best_distribution(df_find)

else:
  best_dist = find_best_distribution(df)

In [None]:
best_dist

In [None]:
name1, params1 = get_name_parameters('%price_change_60d')

In [None]:
# x = latest information from API
x = 0
result1 = get_pdfs[name1](x, params1)
result1

# get range of distribution
# normalize to 0-100 scale

In [None]:
# score_result = math.pow(result1,score)
# weight_result = math.pow(result1, weight)
# #average = (score_result+weight_result)/2

# final1 = score1...score6 = 0.5^6 = 0.016
# # final2 = weight1...weight6 = 
# # average = (final1+final2)/2
# # normalize average
# risk = 1 - average =0.984 98%

In [None]:
for col in df.drop(['date','volume','price','market cap'], axis = 1).columns:

  dist_name, dist_params = get_name_parameters(col)

  result = get_pdfs[dist_name](x, dist_params)

  print(col)
  print(result)
  print(get_max_density(df, col))
  print(result/get_max_density(df, col) * 100,'\n')
  final_result += weight* result / get_max_density(df,col)
  normed_final_result = final_result/6 * 100
  risk = 1 - normed_final_result