Importing Libraries 

In [None]:
import requests
import numpy as np
import pandas as pd
from datetime import datetime
from dateutil.relativedelta import relativedelta

List of 20 cryptocurrencies

In [None]:
coin_list = ['BTC', 'ETH', 'XRP', 'ADA', 'USDT', 'DOGE', 'XLM', 'DOT', 'UNI', 'LINK', 'USDC', 'BCH', 'LTC', 'GRT', 'ETC', 'FIL', 'AAVE', 'ALGO', 'EOS']

Defining our Dataframe in which we will store our web-scrapped data

In [None]:
main_df=pd.DataFrame()

Creating dataframe and Defining start date and end date

In [None]:
for coin in coin_list:
    coin_df = pd.DataFrame()
    df = pd.DataFrame(index=[0])
    
    # Defining the Start Date and End Date
    datetime_end = datetime(2021, 7, 2, 0, 0)
    datetime_check = datetime(2021, 7, 1, 0, 0)
    
    while len(df) > 0:
        if datetime_end == datetime_check:
            break
        
        datetime_start = datetime_end - relativedelta(hours = 12)
        
        #Api for the scrapping
        url = 'https://production.api.coindesk.com/v2/price/values/'+ coin +'?start_date='+datetime_start.strftime("%Y-%m-%dT%H:%M") + '&end_date=' + datetime_end.strftime("%Y-%m-%dT%H:%M") + '&ohlc=true'
        
        #we are using the request to fetch the data from the api in the json format and then storing it into the dataframe.
        temp_data = requests.get(url).json()
        df = pd.DataFrame(temp_data['data']['entries'])
        df.columns = ['Timestamp', 'Open', 'High', 'Low', 'Close']
        
        # To handle the Missing Data
        insert_ids_list = [np.nan]
        
        '''In numerous occasions, I observed that CoinDesk did not collect every minute of data. 
        According to observation, in a normal circumstance, the timestamp would have a discrepancy
        value of 60000 for 1 minute. As a result, I can quickly tell that the missing period is the
        time gap if the row difference for the timestamp is larger than 60000. To deal with it, a
        hot-deck imputation procedure is used. To put it another way, the nearest minute data will 
        be substituted for the missing one.
        '''
        
        while len(insert_ids_list) > 0:
            timestamp_checking = np.array(df['Timestamp'][1:]) - np.array(df['Timestamp'][:-1])
            insert_ids_list = np.where(timestamp_checking!= 60000)[0]
            if len(insert_ids_list) > 0:
                print(str(len(insert_ids_list)) + ' mismatched.')
                insert_ids = insert_ids_list[0]
                temp_df = df.iloc[insert_ids.repeat(int(timestamp_checking[insert_ids]/60000)-1)].reset_index(drop=True)
                temp_df['Timestamp'] = [temp_df['Timestamp'][0] + i*60000 for i in range(1, len(temp_df)+1)]
                df = df.loc[:insert_ids].append(temp_df).append(df.loc[insert_ids+1:]).reset_index(drop=True)
                insert_ids_list = insert_ids_list[1:]
                
        
        #adding datetime and symbol to dataframe
        df = df.drop(['Timestamp'], axis=1)
        df['Datetime'] = [datetime_end - relativedelta(minutes=len(df)-i) for i in range(0, len(df))]
        coin_df = df.append(coin_df)
        datetime_end = datetime_start
        
    coin_df['Symbol'] = coin
    main_df = main_df.append(coin_df)

1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.
1 mismatched.


In [None]:
main_df = main_df[['Datetime', 'Symbol', 'Open', 'High', 'Low', 'Close']].reset_index(drop=True)
main_df

Unnamed: 0,Datetime,Symbol,Open,High,Low,Close
0,2021-07-01 00:00:00,BTC,35049.045484,35056.817222,34991.326658,34993.994267
1,2021-07-01 00:01:00,BTC,34995.349202,35030.583041,34989.167695,34991.266464
2,2021-07-01 00:02:00,BTC,34991.950939,34994.431719,34929.671801,34952.450112
3,2021-07-01 00:03:00,BTC,34954.642324,34980.657519,34954.642324,34961.649826
4,2021-07-01 00:04:00,BTC,34960.350436,34996.245435,34960.350436,34976.586395
...,...,...,...,...,...,...
27355,2021-07-01 23:55:00,EOS,3.945295,3.945295,3.936671,3.939916
27356,2021-07-01 23:56:00,EOS,3.939917,3.940801,3.933735,3.933735
27357,2021-07-01 23:57:00,EOS,3.933920,3.934184,3.928797,3.933092
27358,2021-07-01 23:58:00,EOS,3.933122,3.939846,3.933122,3.939811


In [None]:
main_df.to_csv('main_df.csv', index=False)

Since the data has the price list of all the cryptocurrencies together in a stack manner (i.e. one after the another) , If one wants to analyse only a specific currency, then one might use the following piece of code for extraction of a seperate dataframe having the price list of that currency.

In [None]:
bitcoin_df = main_df.loc[main_df['Symbol'] == 'BTC'] 
bitcoin_df

Unnamed: 0,Datetime,Symbol,Open,High,Low,Close
0,2021-07-01 00:00:00,BTC,35049.045484,35056.817222,34991.326658,34993.994267
1,2021-07-01 00:01:00,BTC,34995.349202,35030.583041,34989.167695,34991.266464
2,2021-07-01 00:02:00,BTC,34991.950939,34994.431719,34929.671801,34952.450112
3,2021-07-01 00:03:00,BTC,34954.642324,34980.657519,34954.642324,34961.649826
4,2021-07-01 00:04:00,BTC,34960.350436,34996.245435,34960.350436,34976.586395
...,...,...,...,...,...,...
1435,2021-07-01 23:55:00,BTC,33594.646151,33594.646151,33576.699353,33578.785074
1436,2021-07-01 23:56:00,BTC,33578.768127,33578.790001,33549.962990,33550.708295
1437,2021-07-01 23:57:00,BTC,33551.565942,33552.572040,33495.794536,33521.282687
1438,2021-07-01 23:58:00,BTC,33522.751773,33573.796139,33522.751773,33572.839461


So using this we can extract all the data for bitcoin 

*Note: We can also use the Yahoofinance api (yfinance) for the scrapping of cryptocurrency data. I have shown that in my time-series forecasting part of the project for collecting the bitcoin prices from over 7 years*