In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
try:
    import twelvedata
except ModuleNotFoundError:
    !pip install twelvedata[pandas,matplotlib,plotly,websocket-client]
    import twelvedata


Collecting twelvedata[matplotlib,pandas,plotly,websocket-client]
  Downloading twelvedata-1.2.24-py2.py3-none-any.whl.metadata (19 kB)
[0mCollecting pytimeparse<2,>=1.1 (from twelvedata[matplotlib,pandas,plotly,websocket-client])
  Downloading pytimeparse-1.1.8-py2.py3-none-any.whl.metadata (3.4 kB)
Downloading pytimeparse-1.1.8-py2.py3-none-any.whl (10.0 kB)
Downloading twelvedata-1.2.24-py2.py3-none-any.whl (46 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.4/46.4 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytimeparse, twelvedata
Successfully installed pytimeparse-1.1.8 twelvedata-1.2.24


In [3]:
import sys
need_in_path = '/content/drive/My Drive/Colab Notebooks'
if need_in_path not in sys.path:
    sys.path.append(need_in_path)

In [4]:
from keys import twelveDataKey as api_key

In [5]:
td = twelvedata.TDClient(apikey = api_key)

In [6]:
ts = td.time_series(
      symbol = 'AAPL',
      interval = '15min',
      outputsize = 4,
      timezone = "America/New_York",
      start_date = "2024-01-01",
      end_date = "2024-02-01"
)


In [None]:
from datetime import datetime
import pandas as pd

def goodEnough(symbol, interval, start_date, end_date):
  request_start_time = start_date
  #iterate until end datetime
  while request_start_time < end_date:
    print(f"Current request : {request_start_time}")
    ts = td.time_series(
      symbol = symbol,
      interval = interval,
      outputsize = 4,
      timezone = "America/New_York",
      start_date = request_start_time,
    )


    request_start_time  = datetime.strptime(request_start_time, '%Y-%m-%d %H:%M') + pd.Timedelta(minutes=15)
    request_start_time = request_start_time.strftime('%Y-%m-%d %H:%M')

  return

In [18]:
from datetime import datetime, timedelta
import pandas as pd
import time

def good_enough_15(symbol, interval, start_date, end_date, td, output_size=5000):
    """
    Fetch overlapping intraday stock data from TwelveData API, every 15 minutes,
    using maximum output size (5000) to retrieve historical data in chunks.

    Args:
        symbol (str): Ticker symbol (e.g., 'AAPL')
        interval (str): Data interval (e.g., '15')
        start_date (str): Start datetime string (e.g., '2020-01-01')
        end_date (str): End datetime string (e.g., '2024-01-01')
        td (twelvedata.TwelveData): Initialized TwelveData client
        output_size (int): Max number of rows per API call (default 5000)

    Returns:
        pd.DataFrame: Cleaned DataFrame with merged, deduplicated price data
    """
    request_start_time = pd.to_datetime(start_date)
    end_time = pd.to_datetime(end_date)

    market_open = timedelta(hours=9, minutes=30)
    market_close = timedelta(hours=16)

    all_data = []

    while request_start_time < end_time:
        if request_start_time.weekday() < 5:  # Monday to Friday only
            time_of_day = timedelta(hours=request_start_time.hour, minutes=request_start_time.minute)

            if market_open <= time_of_day < market_close:
                window_start = request_start_time
                window_end = request_start_time + timedelta(days=30)  # Fetch one month at a time

                if window_end > end_time:
                    window_end = end_time

                print(f"Fetching data from {window_start.strftime('%Y-%m-%d')} to {window_end.strftime('%Y-%m-%d')}")

                while True:  # Retry loop
                    try:
                        ts = td.time_series(
                            symbol=symbol,
                            interval=f"{interval}min",
                            outputsize=output_size,  # Maximize request size
                            timezone="America/New_York",
                            start_date=window_start.strftime('%Y-%m-%d'),
                            end_date=window_end.strftime('%Y-%m-%d')
                        )

                        df = ts.as_pandas().reset_index()
                        df.rename(columns={'datetime': 'datetime'}, inplace=True)
                        all_data.append(df)
                        break  # Exit retry loop on success

                    except Exception as e:
                        print(f"Error fetching data: {e}. Retrying in 60 seconds...")
                        time.sleep(60)  # Wait and retry

        request_start_time += timedelta(days=30)  # Move to the next month

    # Combine all data
    if all_data:
        result_df = pd.concat(all_data)
        result_df.drop_duplicates(subset='datetime', inplace=True)
        result_df.sort_values(by='datetime', inplace=True)
        result_df.reset_index(drop=True, inplace=True)

        # Save to CSV for backup
        result_df.to_csv(f"{symbol}_15min_4years.csv", index=False)

        return result_df
    else:
        return pd.DataFrame()


In [17]:
apl = good_enough_15('AAPL', 15, '2024-02-01 09:30', '2024-02-01 16:00', td)

Fetching data from 2024-02-01 09:30 to 2024-02-01 16:00


In [15]:
apl

Unnamed: 0,datetime,open,high,low,close,volume
0,2024-02-01 09:30:00,184.25,185.64,183.96001,185.22,6268234
1,2024-02-01 09:45:00,185.21001,185.5,184.62,185.5,2210231
2,2024-02-01 10:00:00,185.46001,185.99001,184.89999,185.742,2039908
3,2024-02-01 10:15:00,185.74001,186.17999,185.57001,186.105,1975450
4,2024-02-01 10:30:00,186.12,186.3,185.92999,186.065,1627294
5,2024-02-01 10:45:00,186.07001,186.31,185.35001,185.37061,1655675
6,2024-02-01 11:00:00,185.37,186.07001,185.28999,185.82001,1338452
7,2024-02-01 11:15:00,185.82001,185.91,185.34,185.395,1194561
8,2024-02-01 11:30:00,185.39,185.895,185.36,185.66499,1240088
9,2024-02-01 11:45:00,185.66499,185.8687,185.56,185.68201,933993


In [20]:
from datetime import datetime, timedelta
import pandas as pd
import time

def good_enough(symbol, interval, start_date, end_date, td, output_size=5000):
    """
    Fetch overlapping intraday stock data from TwelveData API, every 15 minutes,
    in large chunks with 50% overlap.

    Args:
        symbol (str): Ticker symbol (e.g., 'AAPL')
        interval (str): Data interval in minutes (e.g., '15')
        start_date (str): Start date string (e.g., '2020-01-01')
        end_date (str): End date string (e.g., '2024-01-01')
        td (twelvedata.TwelveData): Initialized TwelveData client
        output_size (int): Number of data points per request (default 5000)

    Returns:
        pd.DataFrame: Cleaned DataFrame with merged, deduplicated price data
    """
    request_start_time = pd.to_datetime(start_date)
    end_time = pd.to_datetime(end_date)

    # Calculate window size and half-window overlap
    window_size_days = (int(interval) * output_size) / (24 * 60)  # Convert to days
    half_window = window_size_days / 2

    all_data = []

    while request_start_time < end_time:
        window_end = request_start_time + timedelta(days=window_size_days)

        # Stop if window_end exceeds the final end_time
        if window_end > end_time:
            window_end = end_time

        print(f"Fetching data from {request_start_time.strftime('%Y-%m-%d %H:%M')} to {window_end.strftime('%Y-%m-%d %H:%M')}")

        while True:  # Retry loop for API requests
            try:
                ts = td.time_series(
                    symbol=symbol,
                    interval=f"{interval}min",
                    outputsize=output_size,
                    timezone="America/New_York",
                    start_date=request_start_time.strftime('%Y-%m-%d %H:%M'),
                    end_date=window_end.strftime('%Y-%m-%d %H:%M')
                )

                df = ts.as_pandas().reset_index()
                df.rename(columns={'datetime': 'datetime'}, inplace=True)
                all_data.append(df)
                break  # Exit retry loop on success

            except Exception as e:
                print(f"Error fetching data: {e}. Retrying in 60 seconds...")
                time.sleep(60)  # Wait before retrying

        # Move forward by half the window size (50% overlap)
        request_start_time += timedelta(days=half_window)

    # Combine all data
    if all_data:
        result_df = pd.concat(all_data)
        result_df.drop_duplicates(subset='datetime', inplace=True)
        result_df.sort_values(by='datetime', inplace=True)
        result_df.reset_index(drop=True, inplace=True)

        # Save to CSV for backup
        result_df.to_csv(f"{symbol}_15min_4years_overlapping.csv", index=False)

        return result_df
    else:
        return pd.DataFrame()


In [21]:
apl = good_enough('AAPL', 15, '2021-01-01 09:30', '2024-12-31 16:00', td)

Fetching data from 2021-01-01 09:30 to 2021-02-22 11:30
Fetching data from 2021-01-27 10:30 to 2021-03-20 12:30
Fetching data from 2021-02-22 11:30 to 2021-04-15 13:30
Fetching data from 2021-03-20 12:30 to 2021-05-11 14:30
Fetching data from 2021-04-15 13:30 to 2021-06-06 15:30
Fetching data from 2021-05-11 14:30 to 2021-07-02 16:30
Fetching data from 2021-06-06 15:30 to 2021-07-28 17:30
Fetching data from 2021-07-02 16:30 to 2021-08-23 18:30
Fetching data from 2021-07-28 17:30 to 2021-09-18 19:30
Error fetching data: You have run out of API credits for the current minute. 9 API credits were used, with the current limit being 8. Wait for the next minute or consider switching to a higher tier plan at https://twelvedata.com/pricing. Retrying in 60 seconds...
Fetching data from 2021-08-23 18:30 to 2021-10-14 20:30
Fetching data from 2021-09-18 19:30 to 2021-11-09 21:30
Fetching data from 2021-10-14 20:30 to 2021-12-05 22:30
Fetching data from 2021-11-09 21:30 to 2021-12-31 23:30
Fetching

In [22]:
apl.head()

Unnamed: 0,datetime,open,high,low,close,volume
0,2021-01-04 09:30:00,133.371,133.6116,131.45,132.06,11659134
1,2021-01-04 09:45:00,132.07001,132.3,131.29379,131.4534,5528644
2,2021-01-04 10:00:00,131.45,131.67,130.38,130.5099,6201896
3,2021-01-04 10:15:00,130.5,131.14999,130.47,131.0536,4263471
4,2021-01-04 10:30:00,131.05499,131.31,130.56,131.045,3883290


In [24]:
apl.describe()

Unnamed: 0,datetime,open,high,low,close,volume
count,26074,26074.0,26074.0,26074.0,26074.0,26074.0
mean,2022-12-31 19:35:17.626754816,168.791778,169.090542,168.488944,168.797857,2293937.0
min,2021-01-04 09:30:00,116.8,117.06,116.21,116.4,0.0
25%,2021-12-30 14:03:45,145.89,146.13,145.66,145.89999,1104488.0
50%,2022-12-29 15:07:30,166.475,166.72501,166.152495,166.475,1709376.0
75%,2024-01-02 09:56:15,184.96001,185.257465,184.710078,184.97995,2741569.0
max,2024-12-31 15:45:00,259.39001,260.09,259.10999,259.38989,215930000.0
std,,30.730884,30.748406,30.713626,30.733712,2748872.0


In [25]:
msft = good_enough('MSFT', 15, '2021-01-01 09:30', '2024-12-31 16:00', td)

Fetching data from 2021-01-01 09:30 to 2021-02-22 11:30
Fetching data from 2021-01-27 10:30 to 2021-03-20 12:30
Fetching data from 2021-02-22 11:30 to 2021-04-15 13:30
Fetching data from 2021-03-20 12:30 to 2021-05-11 14:30
Fetching data from 2021-04-15 13:30 to 2021-06-06 15:30
Fetching data from 2021-05-11 14:30 to 2021-07-02 16:30
Fetching data from 2021-06-06 15:30 to 2021-07-28 17:30
Fetching data from 2021-07-02 16:30 to 2021-08-23 18:30
Fetching data from 2021-07-28 17:30 to 2021-09-18 19:30
Error fetching data: You have run out of API credits for the current minute. 9 API credits were used, with the current limit being 8. Wait for the next minute or consider switching to a higher tier plan at https://twelvedata.com/pricing. Retrying in 60 seconds...
Fetching data from 2021-08-23 18:30 to 2021-10-14 20:30
Fetching data from 2021-09-18 19:30 to 2021-11-09 21:30
Fetching data from 2021-10-14 20:30 to 2021-12-05 22:30
Fetching data from 2021-11-09 21:30 to 2021-12-31 23:30
Fetching

In [26]:
apl.head()

Unnamed: 0,datetime,open,high,low,close,volume
0,2021-01-04 09:30:00,133.371,133.6116,131.45,132.06,11659134
1,2021-01-04 09:45:00,132.07001,132.3,131.29379,131.4534,5528644
2,2021-01-04 10:00:00,131.45,131.67,130.38,130.5099,6201896
3,2021-01-04 10:15:00,130.5,131.14999,130.47,131.0536,4263471
4,2021-01-04 10:30:00,131.05499,131.31,130.56,131.045,3883290


In [28]:
apl.to_csv('/content/drive/My Drive/AAPL_15min_4years_overlapping.csv', index=False)

In [29]:
msft.to_csv('/content/drive/My Drive/MSFT_15min_4years_overlapping.csv', index=False)

In [None]:
 t