In [3]:
# !chmod 400 /Users/jackhan/Desktop/Alpfin/OneZero_Data.pem
# !ssh-add -d ~/.ssh/id_ed25519

In [4]:
import requests

# Fetch public IP
response = requests.get("http://checkip.amazonaws.com")
if response.status_code == 200:
    public_ip = response.text.strip()
    print(f"Public IP: {public_ip}")
else:
    print(f"Failed to fetch public IP. Status code: {response.status_code}")

Public IP: 104.251.123.179


In [3]:
import pymysql
from sshtunnel import SSHTunnelForwarder
import pandas as pd
import time
from concurrent.futures import ThreadPoolExecutor, as_completed

def get_quote_data(date, symbol):
    """
    Same function you already defined:
    Fetch quote data for a specific date and symbol from Alp_Quotes.
    Returns a Pandas DataFrame or None if there's an error.
    """
    
    # -------------------------------
    # 1. Build the partition name
    # -------------------------------
    month_map = {
        1: "jan", 2: "feb", 3: "mar", 4: "apr", 5: "may", 6: "jun",
        7: "jul", 8: "aug", 9: "sep", 10: "oct", 11: "nov", 12: "dec"
    }
    
    partition_name = f"p_{month_map[date.month]}_{date.year}"
    
    # -------------------------------
    # 2. Build time filter boundaries
    # -------------------------------
    if not isinstance(date, pd.Timestamp):
        date = pd.Timestamp(date)
    start_str = date.strftime("%Y-%m-%d 00:00:00")
    end_str = (date + pd.Timedelta(days=1)).strftime("%Y-%m-%d 00:00:00")
    
    # -------------------------------
    # 3. Clean up symbol for file naming
    # -------------------------------
    symbol_filename = symbol.replace('/', '')
    date_str = date.strftime("%Y-%m-%d")
    output_pickle = f"Data/{symbol_filename}_{date_str}.pkl"
    
    # -------------------------------
    # 4. Build the SQL query
    # -------------------------------
    query = f"""
        SELECT 
            MakerId, 
            CoreSymbol, 
            TimeRecorded, 
            TimeSent, 
            TimeReceived, 
            Depth, 
            Side, 
            Price, 
            Size, 
            Provider, 
            IndicativeFlags, 
            QuoteFlags, 
            DisabledFlags, 
            ForwardPriceDelta, 
            id
        FROM Alp_Quotes PARTITION ({partition_name})
        WHERE 
            Depth >= 0
            AND CoreSymbol = '{symbol}'
            AND TimeRecorded >= '{start_str}'
            AND TimeRecorded < '{end_str}';
    """
    
    ssh_host = '18.133.184.11'
    ssh_user = 'ubuntu'
    ssh_key_file = '/Users/jackhan/Desktop/Alpfin/OneZero_Data.pem'
    db_host = '127.0.0.1'
    db_port = 3306
    db_user = 'Ruize'
    db_password = 'Ma5hedPotato567='
    db_name = 'Alp_CPT_Data'
    
    columns = [
        "MakerId",
        "CoreSymbol",
        "TimeRecorded",
        "TimeSent",
        "TimeReceived",
        "Depth",
        "Side",
        "Price",
        "Size",
        "Provider",
        "IndicativeFlags",
        "QuoteFlags",
        "DisabledFlags",
        "ForwardPriceDelta",
        "id"
    ]
    
    try:
        with SSHTunnelForwarder(
            (ssh_host, 22),
            ssh_username=ssh_user,
            ssh_pkey=ssh_key_file,
            remote_bind_address=(db_host, db_port),
            allow_agent=False,
            host_pkey_directories=[]
        ) as tunnel:
            
            connection = pymysql.connect(
                host='127.0.0.1',
                port=tunnel.local_bind_port,
                user=db_user,
                password=db_password,
                database=db_name,
                connect_timeout=10
            )
            
            try:
                cursor = connection.cursor()
                start_time = time.time()
                
                print("Start Query: ", query)
                cursor.execute(query)
                print("Query Success in seconds: ", time.time() - start_time)
                rows = cursor.fetchall()
                print("Fetch Success")
                
                duration = time.time() - start_time
                print(f"[{symbol} | {date_str}] Fetched {len(rows)} rows in {duration:.2f} secs.")
                
                df = pd.DataFrame(rows, columns=columns)
                df.to_pickle(output_pickle)
                
                return df
            
            finally:
                cursor.close()
                connection.close()
                
    except Exception as e:
        print(f"ERROR for {symbol} on {date_str}: {str(e)}")
        return None



In [4]:
# # Example: get data for 2025-01-08, symbol 'XAU/USD'
# requested_date = pd.Timestamp("2025-01-08")
# df_result = get_quote_data(requested_date, "XAU/USD")

# if df_result is not None:
#     print(f"Fetched DataFrame with {len(df_result)} rows.")
# else:
#     print("No data returned or an error occurred.")

In [5]:
symbols = [
    # "AUD/CAD",
    # "AUD/JPY",
    # "AUD/USD",
    # "BTCUSD",
    # "CAD/JPY",
    # "ETHUSD",
    # "EUR/GBP",
    # "EUR/NZD",
    # "EUR/SEK",
    # "EUR/USD",
    # "EUR/ZAR",
    # "GBP/JPY",
    # "GBP/USD",
    # "NASUSD",
    # "NZD/USD",
    # "U30USD",
    # "USD/CAD",
    # "USD/CHF",
    # "USD/JPY",
    # "USOUSD",
    # "XAG/USD",
    "XAU/USD",
    # "XNG/USD"
]

# 2. Create a date range for all *business* days (weekdays) in January 2025
#    (If you need *all* days including weekends, use freq="D" instead of "B".)
date_range = pd.date_range(start="2025-01-10", end="2025-01-10", freq="D")

# 3. For each date and symbol, call get_quote_data.
#    Note: This assumes you have already defined the get_quote_data() function 
#    in the same script or imported it from elsewhere.

for current_date in date_range:
    for symbol in symbols:
        print(f"Fetching data for {symbol} on {current_date.date()} ...")
        df = get_quote_data(current_date, symbol)
        # df is also saved automatically (via the get_quote_data() function)
        # to Data/<symbol>_<YYYY-MM-DD>.pkl
        if df is not None:
            print(f"  -> Returned {len(df)} rows.")
        else:
            print("  -> No data or error.")

Fetching data for XAU/USD on 2025-01-10 ...
Start Query:  
        SELECT 
            MakerId, 
            CoreSymbol, 
            TimeRecorded, 
            TimeSent, 
            TimeReceived, 
            Depth, 
            Side, 
            Price, 
            Size, 
            Provider, 
            IndicativeFlags, 
            QuoteFlags, 
            DisabledFlags, 
            ForwardPriceDelta, 
            id
        FROM Alp_Quotes PARTITION (p_jan_2025)
        WHERE 
            Depth >= 0
            AND CoreSymbol = 'XAU/USD'
            AND TimeRecorded >= '2025-01-10 00:00:00'
            AND TimeRecorded < '2025-01-11 00:00:00';
    


KeyboardInterrupt: 

In [6]:
# symbols = [
#     "AUD/CAD",
#     # "AUD/JPY",
#     # "AUD/USD",
#     # "BTCUSD",
#     # "CAD/JPY",
#     # "ETHUSD",
#     # "EUR/GBP",
#     # "EUR/NZD",
#     # "EUR/SEK",
#     # "EUR/USD",
#     # "EUR/ZAR",
#     # "GBP/JPY",
#     # "GBP/USD",
#     # "NASUSD",
#     # "NZD/USD",
#     # "U30USD",
#     # "USD/CAD",
#     # "USD/CHF",
#     # "USD/JPY",
#     # "USOUSD",
#     # "XAG/USD",
#     # "XAU/USD",
#     # "XNG/USD"
# ]

# # 2. Create a date range for all *business* days (weekdays) in January 2025
# #    (If you need *all* days including weekends, use freq="D" instead of "B".)
# date_range = pd.date_range(start="2024-11-08", end="2025-01-22", freq="D")

# # 3. For each date and symbol, call get_quote_data.
# #    Note: This assumes you have already defined the get_quote_data() function 
# #    in the same script or imported it from elsewhere.

# for current_date in date_range:
#     for symbol in symbols:
#         print(f"Fetching data for {symbol} on {current_date.date()} ...")
#         df = get_quote_data(current_date, symbol)
#         # df is also saved automatically (via the get_quote_data() function)
#         # to Data/<symbol>_<YYYY-MM-DD>.pkl
#         if df is not None:
#             print(f"  -> Returned {len(df)} rows.")
#         else:
#             print("  -> No data or error.")