In [None]:
from tqdm.notebook import tqdm
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from dotenv import load_dotenv
import os
import seaborn as sns
import pandas as pd
from twelvedata import TDClient
from datetime import datetime, timedelta

tqdm.pandas()
load_dotenv()

API_KEY = os.getenv("API_KEY")
print(f"API_KEY: {API_KEY}")

In [None]:
from twelvedata.exceptions import BadRequestError,InternalServerError,InvalidApiKeyError,TwelveDataError
import time

def load_ts(interval, instrument, start_date, end_date, error_tolerance = 5):
    global CURRENT_END, END_DATETIME
    
    CURRENT_END = end_date + '' # Copy string or referece will overwrite.
    END_DATETIME = datetime.strptime(end_date, '%Y-%m-%d')
    START_DATETIME = datetime.strptime(start_date, '%Y-%m-%d')
    TOTAL_DAYS = (END_DATETIME-START_DATETIME).days

    print(f"Running for {TOTAL_DAYS}")
    td = TDClient(apikey=API_KEY)

    def _load_data_and_update_dates(all_data, interval, instrument):
        global CURRENT_END, END_DATETIME
        
        ts = td.time_series(
                symbol=instrument,
                interval=interval,
                timezone="Europe/Malta",
                outputsize=5000,
                start_date=start_date,
                end_date=CURRENT_END
            )

        ts_df = ts.as_pandas().reset_index()
        all_data.append(ts_df)
        last_date = ts_df.iloc[-1]['datetime'] 
        CURRENT_END = last_date.strftime('%Y-%m-%d')
        END_DATETIME = datetime.strptime(END, '%Y-%m-%d')
        return all_data
        

    iters = 0
    errors = 0
    all_data = []
    pbar = tqdm(desc="Building Timeseries", total=TOTAL_DAYS)
    while START_DATETIME <= END_DATETIME:
        try:
            all_data = _load_data_and_update_dates(all_data, interval, instrument)
            errors = 0
        except BadRequestError as e:
            # these are weekends or holidays.
            print(f"Errors: {errors}\nBadRequestError: {e}")
        except InternalServerError as e:
            print(f"Errors: {errors}\nInternalServerError: {e}")
            errors += 1
        except InvalidApiKeyError as e:
            print(f"Errors: {errors}\nInvalidApiKeyError: {e}")
            errors += 1
        except TwelveDataError as e:
            print(f"Errors: {errors}\nTwelveDataError: {e}")
            errors += 1
             # We might have used up all credits. Retry that day
            time.sleep(60)
        except Exception as e:
            print(f"Errors: {errors}\nAn Exception: {e}")
            errors += 1
        except:
            print(f"Errors: {errors}\nUNKNOWN ERROR")
            errors += 1

        if errors >= error_tolerance:
            print(f"Enough errors: {errors}, quitting the loop!")
            break

        print(f"End date is {CURRENT_END}")
        iters += 1
        pbar.update(iters)

    combined_data = None
    if len(all_data) > 0:
        combined_data = pd.concat(all_data, ignore_index=True)
    else:
        print("Nothing to aggregate!")
    return combined_data

In [None]:
INSTRUMENT_ID = "spx"
INTERVAL = "1min" # 1min, 5min, 15min, 30min, 45min, 1h, 2h, 4h, 1day, 1week, 1month
START = "2020-08-10"
END = "2023-08-11"

GET_CACHED = True # False 

if GET_CACHED:
    file_name = f"./ts/{INSTRUMENT_ID}-{START}-{END}-{INTERVAL}.json"
    # combined_data = pd.read_json(f"./ts/{INSTRUMENT_ID}-{START}-{END}-{INTERVAL}.json", lines=True, orient='index')
    combined_data = pd.concat([chunk for chunk in tqdm(pd.read_json(file_name, lines=True, chunksize=1000), desc=f'Loading {file_name}')])
else:
    combined_data = load_ts(INTERVAL, INSTRUMENT_ID, START, END)
    combined_data.drop_duplicates() # The provider is messy, we need to clean.

if combined_data is not None:
    if GET_CACHED is not True:
        combined_data.to_json(f"./ts/{INSTRUMENT_ID}-{START}-{END}-{INTERVAL}.json", orient="records", indent=1, lines=True)
else:
    print("Failed to load data!")

In [None]:
combined_data.describe()