In [53]:
import requests
import pandas as pd
import math
import aiohttp
import httpx
import asyncio
import ujson as json
from datetime import datetime
import time
import os
import sys
from typing import List, Optional, Dict, TypeAlias
JSON: TypeAlias = dict[str, "JSON"] | list["JSON"] | str | int | float | bool | None

from dotenv import load_dotenv
load_dotenv()

from CUSIP_Curve import CUSIP_Curve

import nest_asyncio
nest_asyncio.apply()

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
def cookie_string_to_dict(cookie_string):
    cookie_pairs = cookie_string.split('; ')
    cookie_dict = {pair.split('=')[0]: pair.split('=')[1] for pair in cookie_pairs if '=' in pair}
    return cookie_dict

In [60]:
t1 = time.time()
cookie_headers = {
    "authority": "services-dynarep.ddwa.finra.org",
    "method": "OPTIONS",
    "path": "/public/reporting/v2/data/group/FixedIncomeMarket/name/TreasuryTradeHistory",
    "scheme": "https",
    "accept": "*/*",
    "accept-encoding": "gzip, deflate, br, zstd",
    "accept-language": "en-US,en;q=0.9",
    "access-control-request-headers": "content-type,x-xsrf-token",
    "access-control-request-method": "POST",
    "cache-control": "no-cache",
    "origin": "https://www.finra.org",
    "pragma": "no-cache",
    "priority": "u=1, i",
    "referer": "https://www.finra.org/",
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36"
}

cookie_url = "https://services-dynarep.ddwa.finra.org/public/reporting/v2/group/Firm/name/ActiveIndividual/dynamiclookup/examCode" 
cookie_response = requests.get(cookie_url, headers=cookie_headers)
cookie_str = dict(cookie_response.headers)["set-cookie"]
cookie_dict = cookie_string_to_dict(cookie_string=cookie_str)
print("cookie fetch time (sec): ", time.time() - t1)

t1 = time.time()
headers = {
    "authority": "services-dynarep.ddwa.finra.org",
    "method": "POST",
    "path": "/public/reporting/v2/data/group/FixedIncomeMarket/name/TreasuryTradeHistory",
    "scheme": "https",
    "accept": "application/json, text/plain, */*",
    "accept-encoding": "gzip, deflate, br, zstd",
    "accept-language": "en-US,en;q=0.9",
    "cache-control": "no-cache",
    # "content-length": str(sys.maxsize),
    "content-type": "application/json",
    "dnt": "1",
    "origin": "https://www.finra.org",
    "pragma": "no-cache",
    "priority": "u=1, i",
    "referer": "https://www.finra.org/",
    "sec-ch-ua": '"Not)A;Brand";v="99", "Google Chrome";v="127", "Chromium";v="127"',
    "sec-ch-ua-mobile": "?0",
    "sec-ch-ua-platform": '"Windows"',
    "sec-fetch-dest": "empty",
    "sec-fetch-mode": "cors",
    "sec-fetch-site": "same-site",
    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36",
    "x-xsrf-token": cookie_dict["XSRF-TOKEN"],
    "cookie": cookie_str 
}

payload = {
    "fields": [
        "issueSymbolIdentifier",
        "cusip",
        "tradeDate",
        "tradeTime",
        "reportedTradeVolume",
        "priceType",
        "lastSalePrice",
        "lastSaleYield",
        "reportingSideCode",
        "contraPartyTypeCode",
    ],
    "dateRangeFilters": [
        {
            "fieldName": "tradeDate", 
            "startDate": "2024-07-31", 
            "endDate": "2024-07-31"
        },
    ],
    "compareFilters": [
        {
            "fieldName": "cusip",
            "fieldValue": "912810UA4",
            "compareType": "EQUAL"
        },
        # {
        #     "fieldName": "tradeTime",
        #     "compareType": "GTE",
        #     "fieldValue": "08:00:00"
        # },
        # {
        #     "fieldName": "tradeTime",
        #     "compareType": "LTE",
        #     "fieldValue": "09:00:00"
        # }
    ],
    "limit": 5000,
    "offset": 50000,
    # "recordMaxLimit": sys.maxsize,
    # "responsePayloadMaxSize": "100mb",
    # "sortFields": ["-tradeTime"],
}

url = "https://services-dynarep.ddwa.finra.org/public/reporting/v2/data/group/FixedIncomeMarket/name/TreasuryTradeHistory"
res = requests.post(url, json=payload, headers=headers)
if res.ok:
    print(json.dumps(res.json()["returnBody"]["headers"], indent=4))
    trade_data_json = json.loads(res.json()["returnBody"]["data"])
    df = pd.DataFrame(trade_data_json)
    display(df)
    # df.to_excel("temp.xlsx")
else:
    print("res status: ", res.status_code)

print("data fetch time (sec): ", time.time() - t1)

cookie fetch time (sec):  0.5769882202148438
{
    "Record-Max-Limit": [
        "5000"
    ],
    "Transfer-Encoding": [
        "chunked"
    ],
    "Connection": [
        "keep-alive"
    ],
    "Pragma": [
        "no-cache"
    ],
    "Record-Limit": [
        "5000"
    ],
    "Date": [
        "Tue, 13 Aug 2024 21:44:55 GMT"
    ],
    "Record-Total": [
        "54649"
    ],
    "X-Frame-Options": [
        "DENY"
    ],
    "FINRA-api-request-id": [
        "0550f79f-08e9-4e97-8ec8-ad46bfc76258"
    ],
    "Response-Payload-Max-Size": [
        "3mb"
    ],
    "Record-Offset": [
        "50000"
    ],
    "Cache-Control": [
        "no-cache, no-store, max-age=0, must-revalidate"
    ],
    "Expires": [
        "0"
    ],
    "data-version": [
        "1"
    ],
    "Content-Type": [
        "application\/json"
    ]
}


Unnamed: 0,tradeTime,cusip,issueSymbolIdentifier,priceType,reportingSideCode,productSubTypeCode,contraPartyTypeCode,tradeDate,reportedTradeVolume,lastSaleYield,lastSalePrice
0,10:39:32,912810UA4,TSRYS5802849,D,S,NOTE,D,2024-07-31,1000000.00,4.348796,104.582031
1,10:39:32,912810UA4,TSRYS5802849,D,S,NOTE,D,2024-07-31,1000000.00,4.345381,104.640625
2,10:39:32,912810UA4,TSRYS5802849,D,B,NOTE,C,2024-07-31,1000000.00,4.345381,104.640625
3,10:39:32,912810UA4,TSRYS5802849,D,S,NOTE,D,2024-07-31,1000000.00,4.345381,104.640625
4,10:39:32,912810UA4,TSRYS5802849,D,B,NOTE,C,2024-07-31,1000000.00,4.345381,104.640625
...,...,...,...,...,...,...,...,...,...,...,...
4644,11:54:58,912810UA4,TSRYS5802849,D,S,NOTE,C,2024-07-31,1000000.00,4.347193,104.609525
4645,11:54:58,912810UA4,TSRYS5802849,D,S,NOTE,D,2024-07-31,3000000.00,4.347202,104.609375
4646,11:55:01,912810UA4,TSRYS5802849,D,S,NOTE,D,2024-07-31,1000000.00,4.347885,104.597656
4647,11:55:01,912810UA4,TSRYS5802849,D,S,NOTE,T,2024-07-31,1000000.00,4.347885,104.597656


data fetch time (sec):  11.938526630401611


In [None]:
# "AppSession=299eba16-cf09-47e7-a539-5574d84511ee; _ga_GH9GRR6EQX=GS1.2.1706280816.32.1.1706280863.0.0.0; _ga_YGK53W03SW=GS1.2.1707000623.7.0.1707000679.4.0.0; _gcl_au=1.1.524480291.1716932991; _ga_6C0WRDNQ4X=GS1.2.1723403344.59.1.1723403552.60.0.0; _ga_ERMNG5DM98=GS1.1.1723403344.64.1.1723403568.0.0.0; __cf_bm=AGM3e.esD_s3e6j9qfuBJQ_NZLJnEwD_lfp8Z1.3NaI-1723553809-1.0.1.1-Sd34Z3iX6ST_mOQEmZQ8W5yLuG_3IKUmUFCr6oi4diEy78u0qt.Cnllv.tzMK47HVfKOLovk6TJswh45.uSXdQ; XSRF-TOKEN=7daea147-1f5a-40ac-9c7c-125df4cdf4b6; _cfuvid=7SlvJ9PLWcz7cmUxk40bFa.B8q5BDsfsJ__wtMVJ2DQ-1723553810336-0.0.1.1-604800000; _gid=GA1.2.1039513591.1723553811; _gat_UA-149324990-1=1; _ga_0469QZC1C3=GS1.1.1723554379.2.1.1723554464.0.0.0; _gat_UA-134617307-1=1; _gat_UA-134600757-1=1; ABTastySession=mrasn=&lp=https%253A%252F%252Fwww.finra.org%252Ffinra-data%252Ffixed-income; ABTasty=uid=w0jzrwcrynkvc81s&fst=1716932988610&pst=1717364774299&cst=1723553808291&ns=4&pvt=38&pvis=25&th=; _ga_PJ6P8VS89P=GS1.2.1723553811.11.1.1723554506.0.0.0; _ga=GA1.2.57496877.1693270740; _ga_P3LS8SG0QV=GS1.1.1723553811.11.1.1723554507.0.0.0; __cf_bm=yxCgZbjHVVHswGhFLg0v_gz8hOXgZUY9UhhEzBY2cbQ-1723554507-1.0.1.1-P4M6fkR8Iiol5Hv5zfhKFXOeOk26Y4mwtufMLD_e3VfROZlK8jH46HekApr.nQhGiP.LNb9l9gec04E_gbowIQ"
# "AppSession=1e2578ae-9ec6-4b76-ac6e-5c800d905c6b; Max-Age=2147483647; Path=/; Domain=.finra.org, __cf_bm=RtTuPNUj0_LDODhrwUtzGPCPgyR8oYCA0FdyzTHQSps-1723568166-1.0.1.1-FQuDbrdY22yZPka0H6yR6SewdjvglFW7tkpZLU94P1USjWrxZW3KV57C.3Z278e1OYJJlFP8R9BQ2k3sfdMqMg; path=/; expires=Tue, 13-Aug-24 17:26:06 GMT; domain=.services-dynarep.ddwa.finra.org; HttpOnly; Secure; SameSite=None, _cfuvid=UtWao_i0XGv0x.H.SVjkicwQsMVtsm.jy86dt4awE6U-1723568166233-0.0.1.1-604800000; path=/; domain=.services-dynarep.ddwa.finra.org; HttpOnly; Secure; SameSite=None"


In [12]:
max_record_size = 54000
num_reqs = math.ceil(max_record_size / 5000)
for i in range(1, num_reqs + 1):
    print(i * 5000)

5000
10000
15000
20000
25000
30000
35000
40000
45000
50000
55000


In [126]:
def temp_tester_httpx(start, end, cusips):
    async def build_tasks(client: httpx.AsyncClient, start, end, cusips):
        tasks = await CUSIP_Curve._build_fetch_tasks_historical_trace_data(client=client, cusips=cusips, start_date=start, end_date=end)
        return await asyncio.gather(*tasks)
    
    async def run_fetch_all(start, end, cusips):
        async with httpx.AsyncClient() as client:
            all_data = await build_tasks(client=client, cusips=cusips, start=start, end=end)
            return all_data

    results = asyncio.run(run_fetch_all(cusips=cusips, start=start, end=end))
    return results

def temp_tester_aio(start, end, cusips):
    async def build_tasks(session: aiohttp.ClientSession, start, end, cusips):
        tasks = await CUSIP_Curve._build_fetch_tasks_historical_trace_data(session=session, cusips=cusips, start_date=start, end_date=end)
        return await asyncio.gather(*tasks)
    
    async def run_fetch_all(start, end, cusips):
        async with aiohttp.ClientSession() as session:
            all_data = await build_tasks(session=session, cusips=cusips, start=start, end=end)
            return all_data

    results = asyncio.run(run_fetch_all(cusips=cusips, start=start, end=end))
    return results

In [127]:
temp_tester_aio(start=datetime(2024, 7, 31), end=datetime(2024, 7, 31), cusips=["912810UA4"])

[('912810UA4',
       tradeTime      cusip issueSymbolIdentifier priceType reportingSideCode  \
  0     08:05:54  912810UA4          TSRYS5802849         D                 S   
  1     08:05:54  912810UA4          TSRYS5802849         D                 S   
  2     08:05:54  912810UA4          TSRYS5802849         D                 S   
  3     08:05:54  912810UA4          TSRYS5802849         D                 S   
  4     08:05:57  912810UA4          TSRYS5802849         D                 S   
  ...        ...        ...                   ...       ...               ...   
  4995  09:11:09  912810UA4          TSRYS5802849         D                 S   
  4996  09:11:09  912810UA4          TSRYS5802849         D                 S   
  4997  09:11:09  912810UA4          TSRYS5802849         D                 S   
  4998  09:11:09  912810UA4          TSRYS5802849         D                 S   
  4999  09:11:09  912810UA4          TSRYS5802849         D                 S   
  
       con

In [61]:
# def fetch_historical_prices(
#     dates: List[datetime], cusips: Optional[List[str]] = None
# ) -> Dict[str, str]:
#     url = "https://savingsbonds.gov/GA-FI/FedInvest/selectSecurityPriceDate"
#     headers = {
#         "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
#         "Accept-Encoding": "gzip, deflate, br, zstd",
#         "Accept-Language": "en-US,en;q=0.9",
#         "Cache-Control": "max-age=0",
#         "Connection": "keep-alive",
#         "Content-Length": "73",
#         "Content-Type": "application/x-www-form-urlencoded",
#         "Dnt": "1",
#         "Host": "savingsbonds.gov",
#         "Origin": "https://savingsbonds.gov",
#         "Referer": "https://savingsbonds.gov/GA-FI/FedInvest/selectSecurityPriceDate",
#         "Sec-Ch-Ua": '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
#         "Sec-Ch-Ua-Mobile": "?0",
#         "Sec-Ch-Ua-Platform": '"Windows"',
#         "Sec-Fetch-Dest": "document",
#         "Sec-Fetch-Mode": "navigate",
#         "Sec-Fetch-Site": "same-origin",
#         "Sec-Fetch-User": "?1",
#         "Upgrade-Insecure-Requests": "1",
#         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
#     }

#     def build_date_payload(date: datetime):
#         return {
#             "priceDate.month": date.month,
#             "priceDate.day": date.day,
#             "priceDate.year": date.year,
#             "submit": "Show Prices",
#         }

#     async def fetch_prices_from_treasury_date_search(
#         session: aiohttp.ClientSession, date: datetime
#     ) -> Dict:
#         payload = build_date_payload(date)
#         try:
#             response = await session.post(url, data=payload, headers=headers, allow_redirects=True)
#             response.raise_for_status()
#             tables = pd.read_html(response.content)
#             df = tables[0]
#             missing_cusips = [
#                 cusip for cusip in cusips if cusip not in df["CUSIP"].values
#             ]
#             if missing_cusips:
#                 print(
#                     f"The following CUSIPs are not found in the DataFrame: {missing_cusips}"
#                 )
#             df = df[df["CUSIP"].isin(cusips)] if cusips else df
#             return date, df
#         except Exception as e:
#             print(f"An error occurred: {e}")
#             return date, pd.DataFrame()

#     async def run_fetch_all(dates: List[datetime]) -> List[Dict]:
#         async with aiohttp.ClientSession() as session:
#             tasks = [
#                 fetch_prices_from_treasury_date_search(session=session, date=date)
#                 for date in dates
#             ]
#             results = await asyncio.gather(*tasks)
#             return results

#     bonds = asyncio.run(run_fetch_all(dates))
#     return dict(bonds)


def fetch_historical_prices(
    dates: List[datetime], cusips: Optional[List[str]] = None
) -> Dict[str, str]:
    url = "https://savingsbonds.gov/GA-FI/FedInvest/selectSecurityPriceDate"
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
        "Accept-Encoding": "gzip, deflate, br, zstd",
        "Accept-Language": "en-US,en;q=0.9",
        "Cache-Control": "max-age=0",
        "Connection": "keep-alive",
        "Content-Length": "73",
        "Content-Type": "application/x-www-form-urlencoded",
        "Dnt": "1",
        "Host": "savingsbonds.gov",
        "Origin": "https://savingsbonds.gov",
        "Referer": "https://savingsbonds.gov/GA-FI/FedInvest/selectSecurityPriceDate",
        "Sec-Ch-Ua": '"Google Chrome";v="125", "Chromium";v="125", "Not.A/Brand";v="24"',
        "Sec-Ch-Ua-Mobile": "?0",
        "Sec-Ch-Ua-Platform": '"Windows"',
        "Sec-Fetch-Dest": "document",
        "Sec-Fetch-Mode": "navigate",
        "Sec-Fetch-Site": "same-origin",
        "Sec-Fetch-User": "?1",
        "Upgrade-Insecure-Requests": "1",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36",
    }

    def build_date_payload(date: datetime):
        return {
            "priceDate.month": date.month,
            "priceDate.day": date.day,
            "priceDate.year": date.year,
            "submit": "Show Prices",
        }

    async def fetch_prices_from_treasury_date_search(
        client: httpx.AsyncClient, date: datetime
    ) -> Dict:
        payload = build_date_payload(date)
        try:
            response = await client.post(url, data=payload, follow_redirects=True)
            response.raise_for_status()
            tables = pd.read_html(response.content)
            df = tables[0]
            missing_cusips = [
                cusip for cusip in cusips if cusip not in df["CUSIP"].values
            ]
            if missing_cusips:
                print(
                    f"The following CUSIPs are not found in the DataFrame: {missing_cusips}"
                )
            df = df[df["CUSIP"].isin(cusips)] if cusips else df
            return date, df
        except httpx.HTTPStatusError as e:
            print(f"HTTP error status: {e.response.status_code}")
            return date, pd.DataFrame()
        except Exception as e:
            print(f"An error occurred: {e}")
            return date, pd.DataFrame()

    timeout = httpx.Timeout(10)

    async def run_fetch_all(dates: List[datetime]) -> List[Dict]:
        async with httpx.AsyncClient(
            # headers=headers,
            timeout=timeout,
        ) as client:
            tasks = [
                fetch_prices_from_treasury_date_search(client=client, date=date)
                for date in dates
            ]
            results = await asyncio.gather(*tasks)
            return results

    bonds = asyncio.run(run_fetch_all(dates))
    return dict(bonds)

In [62]:
fetch_historical_prices(dates=[datetime(2024, 8, 12)], cusips=["912810UA4", "912810UB2"])

{datetime.datetime(2024, 8, 12, 0, 0):          CUSIP      SECURITY TYPE    RATE MATURITY DATE  CALL DATE       BUY  \
 351  912810UB2  MARKET BASED BOND  4.625%    05/15/2044        NaN  104.3125   
 391  912810UA4  MARKET BASED BOND  4.625%    05/15/2054        NaN  107.0000   
 
           SELL  END OF DAY  
 351  104.28125   104.40625  
 391  106.96875   107.18750  }