## 1. CoinGecko

In [1]:
from pycoingecko import CoinGeckoAPI
import pandas as pd
import time

In [2]:
cg = CoinGeckoAPI()

### 1.1 `cg.get_coin_market_chart_by_id()` API


- **:param `id`:**          default is Bitcoin "bitcoin"
- **:param `vs_currency`:** default is USD "usd"
- **:param `days`:**        the range of days to retireve data for "1", "30", "max", etc.
- **:param `interval`:**    the data interval "minutely", "hourly", "daily"

In [3]:
data = cg.get_coin_market_chart_by_id(id = "bitcoin",
                                      vs_currency = "usd",
                                      days = "5",
                                      interval = "daily")

###
# :param id:          default is Bitcoin "bitcoin"
# :param vs_currency: default is USD "usd"
# :param days:        the range of days to retireve data for "1", "30", "max", etc.
# :param interval:    the data interval "minutely", "hourly", "daily"
###

In [4]:
type(data)

dict

In [5]:
#data: a dictionary with 3 keys: prices, market_caps and total_volumes

#each key associates with a list, in which each element is also a list of two elements: timestamp and value of the key

data

{'prices': [[1729555200000, 67394.85775735618],
  [1729641600000, 67351.04793512498],
  [1729728000000, 66683.96806542306],
  [1729814400000, 68214.05205157402],
  [1729900800000, 66585.66535501445],
  [1729911829000, 66650.69451353882]],
 'market_caps': [[1729555200000, 1332326858574.4448],
  [1729641600000, 1331426858508.4004],
  [1729728000000, 1318285437546.2786],
  [1729814400000, 1347888109010.5889],
  [1729900800000, 1313975091942.221],
  [1729911829000, 1320159363921.0457]],
 'total_volumes': [[1729555200000, 40485496091.95008],
  [1729641600000, 31398389967.38113],
  [1729728000000, 32676918488.059],
  [1729814400000, 35892606570.03549],
  [1729900800000, 48426924850.053406],
  [1729911829000, 50304110341.034874]]}

In [6]:
data["prices"]

[[1729555200000, 67394.85775735618],
 [1729641600000, 67351.04793512498],
 [1729728000000, 66683.96806542306],
 [1729814400000, 68214.05205157402],
 [1729900800000, 66585.66535501445],
 [1729911829000, 66650.69451353882]]

In [7]:
df = pd.DataFrame(data["prices"], columns = ["Timestamp", "Price"])
df.head()

Unnamed: 0,Timestamp,Price
0,1729555200000,67394.857757
1,1729641600000,67351.047935
2,1729728000000,66683.968065
3,1729814400000,68214.052052
4,1729900800000,66585.665355


#### Convert Unix timestamp (in milliseconds) into standard date-time format

**`pd.to_datetime()`**

In [8]:
df["Date"] = pd.to_datetime(df["Timestamp"], unit="ms")

In [9]:
df.head()

Unnamed: 0,Timestamp,Price,Date
0,1729555200000,67394.857757,2024-10-22
1,1729641600000,67351.047935,2024-10-23
2,1729728000000,66683.968065,2024-10-24
3,1729814400000,68214.052052,2024-10-25
4,1729900800000,66585.665355,2024-10-26


#### Set the "Date” (date-time values) as the DataFrame's index

Many Pandas time-based operations (like `resample()`) work based on this index

In [10]:
df.set_index("Date", inplace = True)  # inplace=Ture: modifies the original df directly rather than returning a new df
df.head()

Unnamed: 0_level_0,Timestamp,Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-10-22,1729555200000,67394.857757
2024-10-23,1729641600000,67351.047935
2024-10-24,1729728000000,66683.968065
2024-10-25,1729814400000,68214.052052
2024-10-26,1729900800000,66585.665355


### 1.2 `cg.get_coin_market_chart_range_by_id()` API

- **:param `id`:** default is Bitcoin "bitcoin"
- **:param `vs_currency`:** default is USD "usd"
- **:param `from_timestamp`:** the Unix timestamp
- **:param `to_timestamp`:** the Unix timestamp

In [11]:
start_date = "2024-01-01"
end_date = "2024-02-01"

#### Convert start and end dates to Unix timestamps

In [12]:
pd.to_datetime(start_date)

Timestamp('2024-01-01 00:00:00')

In [13]:
pd.to_datetime(start_date).timetuple()

time.struct_time(tm_year=2024, tm_mon=1, tm_mday=1, tm_hour=0, tm_min=0, tm_sec=0, tm_wday=0, tm_yday=1, tm_isdst=-1)

In [14]:
time.mktime(pd.to_datetime(start_date).timetuple())

1704085200.0

In [15]:
int(time.mktime(pd.to_datetime(start_date).timetuple()))

1704085200

In [16]:
from_timestamp = int(time.mktime(pd.to_datetime(start_date).timetuple()))
to_timestamp = int(time.mktime(pd.to_datetime(end_date).timetuple()))

#### Get the data

In [17]:
data = cg.get_coin_market_chart_range_by_id(id = "bitcoin",
                                            vs_currency = "usd",
                                            from_timestamp = from_timestamp,
                                            to_timestamp = to_timestamp)
type(data)

dict

In [18]:
df = pd.DataFrame(data["prices"], columns = ["Timestamp", "Price"])

df.head()                  

Unnamed: 0,Timestamp,Price
0,1704085264933,42349.340091
1,1704088850683,42245.399145
2,1704092451846,42417.935946
3,1704096005236,42471.252137
4,1704099653385,42517.226836


In [19]:
df["Date"] = pd.to_datetime(df["Timestamp"], unit = "ms")
df.tail()

Unnamed: 0,Timestamp,Price,Date
738,1706745650896,42572.119544,2024-02-01 00:00:50.896
739,1706749265588,42451.351597,2024-02-01 01:01:05.588
740,1706752848843,41971.37082,2024-02-01 02:00:48.843
741,1706756468307,42102.189965,2024-02-01 03:01:08.307
742,1706760069226,41972.164605,2024-02-01 04:01:09.226


In [20]:
df.set_index("Date", inplace=True)
df.head()

Unnamed: 0_level_0,Timestamp,Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01 05:01:04.933,1704085264933,42349.340091
2024-01-01 06:00:50.683,1704088850683,42245.399145
2024-01-01 07:00:51.846,1704092451846,42417.935946
2024-01-01 08:00:05.236,1704096005236,42471.252137
2024-01-01 09:00:53.385,1704099653385,42517.226836


#### Using `resample()` to get 4-hourly data

In [21]:
df_4h = df.resample('4h').first()

df_4h.head()

Unnamed: 0_level_0,Timestamp,Price
Date,Unnamed: 1_level_1,Unnamed: 2_level_1
2024-01-01 04:00:00,1704085264933,42349.340091
2024-01-01 08:00:00,1704096005236,42471.252137
2024-01-01 12:00:00,1704110486723,42698.693073
2024-01-01 16:00:00,1704124845648,42799.529566
2024-01-01 20:00:00,1704139213477,43495.913496


### 1.3 CoinGecko Data Generator

In [None]:
import pandas as pd
from pycoingecko import CoinGeckoAPI
import time


def generate_data_coingecko(output_path, coin_id = "bitcoin", vs_currency = "usd",
                            start_date = "2023-11-01", end_date = "2024-10-01",
                            intervel = "4h"):
    """
    Fetch cryptocurrency data using the range API from CoinGeckoAPI,
    resample to the desired interval, and save it as a CSV file.

    :param output_path: Path to save the CSV file.
    :param coin_id: CoinGecko ID of the cryptocurrency (default is Bitcoin "bitcoin").
    :param vs_currency: The currency in which to get the market data (default is USD "usd").
    :param start_date: The start date for the data (format 'YYYY-MM-DD').
    :param end_date: The end date for the data (format 'YYYY-MM-DD').
    :param interval: Desired resampling interval (e.g., '4h' for 4-hour intervals).
    """
    
    # Initialize CoinGecko API
    cg = CoinGeckoAPI()

    #Convert start and end dates to Unix timestamp
    from_timestamp = int(time.mktime(pd.to_datetime(start_date).timetuple()))
    to_timestamp = int(time.mktime(pd.to_datetime(end_date).timetuple()))

    #Fetch data from the specific date range
    print(f"Fetching data for {coin_id} from {start_date} to {end_date}...")
    data = cg.get_coin_market_chart_range_by_id(id = coin_id, vs_currency = vs_currency,
                                                from_timestamp = from_timestamp, to_timestamp = to_timestamp)


    # Convert the data to a pandas Data Frame
    df = pd.DataFrame(data["prices"], columns = ["Timestamp", "Price"])
    df["Date"] = pd.to_datetime(df["Timestamp"], unit = "ms")  # Conver Unix Timestamp to data-time value
    df.set_index("Date", inplace = True)  # Set the Date as the index for resampling

    # Resample the data to desired interval
    df_resampled = df.resample(interval).first()

    # Save teh data as a CSV file to the output path
    df_resampled.to_csv(output_path)
    print(f"Data has been saved to {output_path} with {interval} intervals.")
 
    

if __name__ == "__main__":
    generate_data_coingecko("../data/bitcoin_4h_data.csv")
    


## 2. CoinCap

In [40]:
import requests
import pandas as pd

In [41]:
url = "https://api.coincap.io/v2/assets/ethereum/history?interval=d1"
response = requests.get(url)
data = response.json()

In [42]:
type(data)

dict

In [43]:
df = pd.DataFrame(data['data'])
df['date'] = pd.to_datetime(df['time'], unit='ms')
df.set_index('date', inplace=True)
df = df[['priceUsd']].rename(columns={'priceUsd': 'Price'})

In [44]:
df.head()

Unnamed: 0_level_0,Price
date,Unnamed: 1_level_1
2023-10-28,1788.0622255959847
2023-10-29,1791.8593855739584
2023-10-30,1804.0195782396388
2023-10-31,1804.924130044944
2023-11-01,1814.153221504956


In [45]:
df.tail()

Unnamed: 0_level_0,Price
date,Unnamed: 1_level_1
2024-10-21,2702.47092254333
2024-10-22,2633.987671102057
2024-10-23,2565.19360501292
2024-10-24,2534.2717917868754
2024-10-25,2509.4665188101944


## 3. Yahoo Finance

In [33]:
import yfinance as yf
import pandas as pd

eth_data = yf.download('ETH-USD', start='2015-01-01', interval='1d')

[*********************100%***********************]  1 of 1 completed


In [34]:
eth_data.head()

Price,Adj Close,Close,High,Low,Open,Volume
Ticker,ETH-USD,ETH-USD,ETH-USD,ETH-USD,ETH-USD,ETH-USD
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2017-11-09 00:00:00+00:00,320.884003,320.884003,329.451996,307.056,308.644989,893249984
2017-11-10 00:00:00+00:00,299.252991,299.252991,324.717987,294.541992,320.67099,885985984
2017-11-11 00:00:00+00:00,314.681,314.681,319.453003,298.191986,298.585999,842300992
2017-11-12 00:00:00+00:00,307.90799,307.90799,319.153015,298.513,314.690002,1613479936
2017-11-13 00:00:00+00:00,316.716003,316.716003,328.415009,307.024994,307.024994,1041889984


In [35]:
eth_data.tail()

Price,Adj Close,Close,High,Low,Open,Volume
Ticker,ETH-USD,ETH-USD,ETH-USD,ETH-USD,ETH-USD,ETH-USD
Date,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
2024-10-21 00:00:00+00:00,2665.712158,2665.712158,2765.549561,2655.294434,2746.305664,17328615363
2024-10-22 00:00:00+00:00,2620.19751,2620.19751,2669.790039,2605.618652,2665.656738,15541911074
2024-10-23 00:00:00+00:00,2509.098877,2509.098877,2624.450439,2457.169922,2620.088623,17876984551
2024-10-24 00:00:00+00:00,2534.498535,2534.498535,2559.151367,2506.7229,2523.606934,16128627601
2024-10-26 00:00:00+00:00,2450.075439,2450.075439,2450.075439,2428.77124,2435.932861,24082782208


In [36]:
isinstance(eth_data.columns, pd.MultiIndex)

True

In [37]:
eth_data.columns = eth_data.columns.get_level_values(0)

In [38]:
eth_data.head()

Price,Adj Close,Close,High,Low,Open,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-11-09 00:00:00+00:00,320.884003,320.884003,329.451996,307.056,308.644989,893249984
2017-11-10 00:00:00+00:00,299.252991,299.252991,324.717987,294.541992,320.67099,885985984
2017-11-11 00:00:00+00:00,314.681,314.681,319.453003,298.191986,298.585999,842300992
2017-11-12 00:00:00+00:00,307.90799,307.90799,319.153015,298.513,314.690002,1613479936
2017-11-13 00:00:00+00:00,316.716003,316.716003,328.415009,307.024994,307.024994,1041889984


In [39]:
eth_data.columns

Index(['Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume'], dtype='object', name='Price')