In [2]:
import requests
import json
import pandas as pd
from datetime import datetime
import time
from tqdm import tqdm 
import numpy as np

coingecko_url = "https://api.coingecko.com/api/v3/"

In [7]:
all_coins_url = coingecko_url + "coins/list/"

casper_ids = {
    "id": "casper-network",
    "symbol": "cspr",
    "name" : "Casper Network"
}

tezos_ids = {
    'id': 'tezos', 
    'symbol': 'xtz', 
    'name': 'Tezos'
}
    

## Get Market, Community and Developer Data
Create arrays of dates for which to request data.

In [8]:
tezos_start =  "03-07-2018"
casper_start = "12-05-2021"
end_date = "27-06-2023"


# Return array of strings in format dd-mm-yyyy; inputs are in the same date format
def get_date_array(start, end, freq):

    start = datetime.strptime(start, '%d-%m-%Y')
    end = datetime.strptime(end, '%d-%m-%Y')

    dti = pd.date_range(start, end, freq = freq)
    return dti.strftime('%d-%m-%Y')

tezos_dates = get_date_array(tezos_start, end_date, "D")
casper_dates = get_date_array(casper_start, end_date, "D")

tezos_ids["start_date"] = tezos_start
casper_ids["start_date"] = casper_start


In [4]:
tezos_url = "{}coins/{}/history".format(coingecko_url, tezos_ids["id"])
casper_url = "{}coins/{}/history".format(coingecko_url, casper_ids["id"])

data = []

request_url = tezos_url
file_name = "Market data/Tezos_market_data.csv"

# Request data for each day in dates array and store it in a dataframe; sleep time is because of API limitations
for i, day in tqdm(enumerate(tezos_dates)):
    params = {
        "date": day
    }
    response = requests.get(url = request_url, params=params)
    js = json.loads(response.text)

    values = {}

    values['Date'] = day

    if not set(["market_data", "community_data", "developer_data"]).issubset(set(js.keys())):
        print(day, json.dumps(js, indent=2))

    for k, v in js["market_data"].items():
        values[k] = v["usd"]

    for k, v in js["community_data"].items():
        values[k] = v

    for k, v in js["developer_data"].items():
        if k == "code_additions_deletions_4_weeks": continue
        values[k] = v

    data.append(values)

    if i > 0 & i%30 == 0:
        time.sleep(10)
        df = pd.DataFrame.from_dict(data)
        df.to_csv(file_name, index = False)

df = pd.DataFrame.from_dict(data)
df.to_csv(file_name, index = False)

234it [40:13, 10.32s/it]


KeyboardInterrupt: 

In [5]:
df = pd.DataFrame.from_dict(data)
df.to_csv(file_name, index = False)

### Format data

In [9]:
df_casper = pd.read_csv("Market data/CoinGecko_Casper.csv")

In [None]:
df_casper.isna().sum()

In [11]:
# Remove the columns with mostly NA values and save final result to csv
cols = [ 'Date', 'current_price', 'market_cap', 'total_volume', 
       'twitter_followers', 'reddit_average_posts_48h',
       'reddit_average_comments_48h', 'reddit_subscribers',
       'reddit_accounts_active_48h']

df_result = df_casper[cols]
df_result.to_csv("Market data/Casper_market_data.csv", index = False)

## Get price data for other relevant blockchains

In [5]:
other_cryptos = ['bitcoin', 'ethereum', 'ripple', 'litecoin']

start_timestamp = time.mktime(datetime.strptime(tezos_dates[0], "%d-%m-%Y").timetuple())
end_timestamp = time.mktime(datetime.strptime(tezos_dates[-1], "%d-%m-%Y").timetuple())

params = {
    "from":start_timestamp,
    "to": end_timestamp,
    "vs_currency": "usd"
}

result_dir = "Market data/"

for cr in other_cryptos:
    url = coingecko_url + "coins/" + cr + "/market_chart/range"
    params["id"] = cr

    response = requests.get(url = url, params = params)
    js = json.loads(response.text)

    df_temp = pd.DataFrame()

    for key in js.keys():
        timestamps, values = np.split(np.array(js[key]), 2, axis=1)

        if df_temp.shape[1] == 0:
            df_temp = pd.DataFrame({"Date" : timestamps.flatten(), "price" : values.flatten()})
    	
        else:
            df_temp[key] = values.flatten()

    df_temp.Date = pd.to_datetime(df_temp.Date, unit = 'ms')
    df_temp.rename(columns =  {'market_caps':'market_cap', 'total_volumes':'total_volume'})
    
    df_temp.to_csv(result_dir + cr + "_market_data.csv", index = False)





### Get data for USDT

In [None]:
id = "tether", "usdtz"

## Get coin OHLC

In [28]:
params = {
    "vs_currency": "usd",
    "days": "max"
}

end_date_datime = pd.to_datetime(end_date,  format="%d-%m-%Y")

for id_dict in [tezos_ids, casper_ids]:
    url = "{}/coins/{}/ohlc".format(coingecko_url, id_dict["id"])
    params["id"] = id_dict["id"]

    response = requests.get(url = url, params = params)
    js = json.loads(response.text)

    start_date  = pd.to_datetime(id_dict["start_date"],  format="%d-%m-%Y")

    splitted = np.split(np.array(js), 5, axis=1)
    df_temp = pd.DataFrame()

    for i in splitted:
        df_temp[len(df_temp.columns)] = i.flatten()

    df_temp.columns = ['date', 'open', 'high', 'low', 'close']
    df_temp.date = pd.to_datetime(df_temp.date, unit = 'ms')

    df_temp = df_temp.loc[df_temp.date >= start_date]
    df_temp.to_csv(result_dir + id_dict["id"] + "_OHLC.csv", index = False )

In [31]:


response = requests.get(url = "https://api.coingecko.com/api/v3/coins/tezos/tickers", params = {"id":"tezos"})
js = json.loads(response.text)

time= [i['timestamp'] for i in js['tickers']]

time


['2023-07-04T15:28:11+00:00',
 '2023-07-04T15:25:34+00:00',
 '2023-07-04T15:25:44+00:00',
 '2023-07-04T15:26:33+00:00',
 '2023-07-04T15:25:12+00:00',
 '2023-07-04T15:11:50+00:00',
 '2023-07-04T15:25:52+00:00',
 '2023-07-04T15:25:28+00:00',
 '2023-07-04T15:25:56+00:00',
 '2023-07-04T15:27:24+00:00',
 '2023-07-04T15:27:51+00:00',
 '2023-07-04T15:25:41+00:00',
 '2023-07-04T15:27:16+00:00',
 '2023-07-04T15:25:20+00:00',
 '2023-07-04T15:24:33+00:00',
 '2023-07-04T15:18:44+00:00',
 '2023-07-04T15:25:53+00:00',
 '2023-07-04T15:11:50+00:00',
 '2023-07-04T15:25:20+00:00',
 '2023-07-04T15:28:00+00:00',
 '2023-07-04T15:27:13+00:00',
 '2023-07-04T15:26:42+00:00',
 '2023-07-04T15:27:26+00:00',
 '2023-07-04T15:26:46+00:00',
 '2023-07-04T15:26:02+00:00',
 '2023-07-04T15:25:57+00:00',
 '2023-07-04T15:13:22+00:00',
 '2023-07-04T15:26:46+00:00',
 '2023-07-04T15:07:06+00:00',
 '2023-07-04T15:17:22+00:00',
 '2023-07-04T15:22:00+00:00',
 '2023-07-04T15:15:10+00:00',
 '2023-07-04T15:27:24+00:00',
 '2023-07-