# SETUP

We will gather both market capitalization data and opening and closing prices through the Binance API.

Then, we will process this data to build the base files.

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

# progress bar
from tqdm import tqdm, tqdm_notebook
# instantiate
tqdm.pandas(desc='Progress')

%config IPCompleter.greedy=True

#display format
pd.options.display.float_format = '{:15.2f}'.format

## DATA LOADING

### List of Assets with the Highest Market Capitalization

We will create a list of assets with the highest market capitalization. To do this, we need to retrieve the circulating supply and the current market price, then multiply them.

#### Retrieving Current Supply Data

In [2]:
from binance.client import Client
from datetime import datetime, timedelta

In [3]:
# No API key/secret needed for this type of call
client = Client()

In [4]:
products = client.get_products()

In [5]:
pairs=pd.DataFrame(products['data'])[['s','b','cs']]
pairs

Unnamed: 0,s,b,cs
0,BNBBTC,BNB,144010819
1,NULSBTC,NULS,110578049
2,NEOBTC,NEO,70538831
3,LINKBTC,LINK,626849970
4,IOTABTC,IOTA,3516571099
...,...,...,...
1260,NEIROUSDC,NEIRO,420679538754
1261,PNUTBTC,PNUT,999854908
1262,PNUTFDUSD,PNUT,999854908
1263,PNUTUSDC,PNUT,999854908


In [6]:
pairs.columns=['pair','asset','circulating_supply']
pairs

Unnamed: 0,pair,asset,circulating_supply
0,BNBBTC,BNB,144010819
1,NULSBTC,NULS,110578049
2,NEOBTC,NEO,70538831
3,LINKBTC,LINK,626849970
4,IOTABTC,IOTA,3516571099
...,...,...,...
1260,NEIROUSDC,NEIRO,420679538754
1261,PNUTBTC,PNUT,999854908
1262,PNUTFDUSD,PNUT,999854908
1263,PNUTUSDC,PNUT,999854908


We group the circulating capital by asset.

In [7]:
assets=pairs.groupby('asset')[['circulating_supply']].sum().reset_index()
assets

Unnamed: 0,asset,circulating_supply
0,1000SATS,8400000000000
1,1INCH,2557569526
2,1MBABYDOGE,459621329676
3,AAVE,89872014
4,ACA,3416249982
...,...,...
382,ZEN,46996050
383,ZIL,76704179608
384,ZK,18375000000
385,ZRO,550000000


#### Calculating Market Capitalization

We calculate the market capitalization for each asset by multiplying the volume by the price in USDT (obtained via the Binance API).

In [8]:
def calculate_market_cap(row):
    
    #built the pair
    currency=row.asset
    pair=currency + 'USDT'
    
    #calculate the marketcap and manage the nulls
    if pair in pairs.pair.to_list():
    
        avg_price = round(float(client.get_avg_price(symbol=pair)['price']),2)
        market_capitalization= row.circulating_supply* avg_price
    else:
        market_capitalization=-999
    
    return market_capitalization
    

In [9]:
assets['market_cap']=assets.progress_apply(lambda x: calculate_market_cap(x),axis=1)
assets

Progress: 100%|██████████████████████████████████████████████████████████████████████| 387/387 [01:44<00:00,  3.71it/s]


Unnamed: 0,asset,circulating_supply,market_cap
0,1000SATS,8400000000000,0.00
1,1INCH,2557569526,869573638.84
2,1MBABYDOGE,459621329676,0.00
3,AAVE,89872014,14781250142.58
4,ACA,3416249982,273299998.56
...,...,...,...
382,ZEN,46996050,434713462.50
383,ZIL,76704179608,1534083592.16
384,ZK,18375000000,2756250000.00
385,ZRO,550000000,1996500000.00


We remove assets with a market capitalization of zero and nulls

In [10]:
assets=assets[assets.market_cap>0]

### Creating the Historical Market Price Data

#### Ranking the Top 10 Assets by Market Capitalization

In [11]:
assets=assets.sort_values(by='market_cap', ascending=False).iloc[:10]
assets

Unnamed: 0,asset,circulating_supply,market_cap
71,BTC,316549392,28969590563279.52
127,ETH,1685953976,5238022969875.36
309,SOL,5220239013,1248628969519.47
64,BNB,1584119009,972316406534.11
375,XRP,683174906088,737828898575.04
111,DOGE,1321676067456,515453666307.84
9,ADA,350337767870,255746570545.1
339,TRX,690957467944,138191493588.8
45,AVAX,3681320760,125385785085.6
320,SUI,25611756264,92714557675.68


#### Retrieving Historical Market Data for These Assets

We retrieve the data through the API and convert it to a dataframe.

In [12]:
def days(df):
    number_of_days=df.shape[0]
    
    #calculate begining and end
    begining=datetime.today()-timedelta(days=number_of_days-1)
    end=datetime.today()
    
    #create the dates
    dates = [(begining + timedelta(days=d)).strftime("%Y-%m-%d") for d in range((end - begining).days+1)]
    
    return dates

In [13]:
def historical_price_data(currency):

    #create the pair
    pair = currency + 'USDT'
    
    #set de column names
    final_columns = [
        'open_time', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume',
        'ignore'
    ]

    #get the historical data
    klines = client.get_historical_klines(pair, Client.KLINE_INTERVAL_1DAY,'1 Jul, 2017')

    price_data=pd.DataFrame(klines,columns=final_columns)
    
    #add dates and the currency
    dates= days(price_data)
    
    price_data.insert(0,'date',dates)
    
    price_data.insert(0,'asset',currency)
    
    return price_data

In [14]:
columns= [
        'asset','date','open_time', 'open', 'high', 'low', 'close', 'volume',
        'close_time', 'quote_asset_volume', 'number_of_trades',
        'taker_buy_base_asset_volume', 'taker_buy_quote_asset_volume',
        'ignore'
        ]

historical_data_assets= pd.DataFrame()

for asset in assets.asset.to_list():
    
    historical_asset= historical_price_data(asset)
    
    historical_data_assets= pd.concat([historical_data_assets,historical_asset],axis='index',ignore_index=True )
    
    

historical_data_assets

Unnamed: 0,asset,date,open_time,open,high,low,close,volume,close_time,quote_asset_volume,number_of_trades,taker_buy_base_asset_volume,taker_buy_quote_asset_volume,ignore
0,BTC,2017-08-17,1502928000000,4261.48000000,4485.39000000,4200.74000000,4285.08000000,795.15037700,1503014399999,3454770.05073206,3427,616.24854100,2678216.40060401,0
1,BTC,2017-08-18,1503014400000,4285.08000000,4371.52000000,3938.77000000,4108.37000000,1199.88826400,1503100799999,5086958.30617151,5233,972.86871000,4129123.31651808,0
2,BTC,2017-08-19,1503100800000,4108.37000000,4184.69000000,3850.00000000,4139.98000000,381.30976300,1503187199999,1549483.73542151,2153,274.33604200,1118001.87008735,0
3,BTC,2017-08-20,1503187200000,4120.98000000,4211.08000000,4032.62000000,4086.29000000,467.08302200,1503273599999,1930364.39032646,2321,376.79594700,1557401.33373730,0
4,BTC,2017-08-21,1503273600000,4069.13000000,4119.62000000,3911.79000000,4016.00000000,691.74306000,1503359999999,2797231.71402728,3972,557.35610700,2255662.55315837,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20639,SUI,2024-11-15,1731628800000,3.35320000,3.67960000,3.10000000,3.61070000,116034766.30000000,1731715199999,390563508.02717000,843956,57315200.40000000,193090645.78848000,0
20640,SUI,2024-11-16,1731715200000,3.61080000,3.93600000,3.57920000,3.76070000,108161146.40000000,1731801599999,407119125.87105000,865346,54740169.00000000,205981693.01529000,0
20641,SUI,2024-11-17,1731801600000,3.76100000,3.94090000,3.57320000,3.78390000,111441563.60000000,1731887999999,418296601.94244000,910716,56465271.30000000,212003036.51975000,0
20642,SUI,2024-11-18,1731888000000,3.78460000,3.90980000,3.60570000,3.69920000,83318827.70000000,1731974399999,311301569.21275000,642774,42195814.00000000,157753860.38124000,0


We order the data by ascending date.

In [15]:
historical_data_assets=historical_data_assets.sort_values(['asset','date'], ignore_index=True)

## SAVING THE DATA AS SOURCE FILE

In [16]:
pickle_file_name='raw_historical_data.pickle'
csv_file_name='raw_historical_data.csv'

path_pickle='../../02_Data/01_Originals/' + pickle_file_name
path_csv= '../../02_Data/01_Originals/' + csv_file_name

historical_data_assets.to_pickle(path_pickle)
historical_data_assets.to_csv(path_csv)