In [1]:
"""
- Questions: bid vs ask, ticker
- Divide upon event and buy side
- Get prob with the parameters and thresholds set
"""

'\n- Questions: bid vs ask, ticker\n- Divide upon event and buy side\n- Get prob with the parameters and thresholds set\n'

In [2]:
import pandas as pd

csv_path = "./data/m_listening4.csv"
df = pd.read_csv(csv_path)

# Create a multi-index grouped DataFrame
grouped_df = df.groupby(['event', 'buy_pick']).apply(lambda x: x)

print(f"{len(df['event'].unique())} events in the dataset:\n{'='*80}")

for i, event in enumerate(df['event'].unique(), 1):
    event_df = df[df['event'] == event]
    up_count = len(event_df[event_df['buy_pick'] == 'UP'])
    down_count = len(event_df[event_df['buy_pick'] == 'DOWN'])
    print(f"{i}: {event} - UP: {up_count}, DOWN: {down_count}, TOTAL: {up_count + down_count} \n {'-' * 40}")

# Function to get records for specific event and buy_pick
def get_records(event_name, buy_pick_value):
    try:
        return grouped_df.loc[(event_name, buy_pick_value)]
    except KeyError:
        # Return empty DataFrame if not found
        return pd.DataFrame(columns=df.columns)
    
# Example usage:
# Get all UP picks for the Bitcoin event
e = "Bitcoin Up or Down - January 13, 4:00AM-4:15AM ET"
result = get_records(
    event_name=e,
    buy_pick_value="UP"
)

print(f"Searched for {e}. Found {len(result)} records")
print(result)

15 events in the dataset:
1: Bitcoin Up or Down - January 13, 10:45AM-11:00AM ET - UP: 695, DOWN: 606, TOTAL: 1301 
 ----------------------------------------
2: Bitcoin Up or Down - January 13, 11:00AM-11:15AM ET - UP: 946, DOWN: 946, TOTAL: 1892 
 ----------------------------------------
3: Bitcoin Up or Down - January 13, 11:15AM-11:30AM ET - UP: 907, DOWN: 937, TOTAL: 1844 
 ----------------------------------------
4: Bitcoin Up or Down - January 13, 11:30AM-11:45AM ET - UP: 916, DOWN: 748, TOTAL: 1664 
 ----------------------------------------
5: Bitcoin Up or Down - January 13, 11:45AM-12:00PM ET - UP: 915, DOWN: 927, TOTAL: 1842 
 ----------------------------------------
6: Bitcoin Up or Down - January 13, 12:00PM-12:15PM ET - UP: 940, DOWN: 949, TOTAL: 1889 
 ----------------------------------------
7: Bitcoin Up or Down - January 13, 12:15PM-12:30PM ET - UP: 948, DOWN: 954, TOTAL: 1902 
 ----------------------------------------
8: Bitcoin Up or Down - January 13, 12:30PM-12:45P

  grouped_df = df.groupby(['event', 'buy_pick']).apply(lambda x: x)


In [3]:
# parameters list
minutes_before_end = [3, 5, 10]
ask_price = [80, 85, 90, 95]

import pandas as pd
results_df = pd.read_csv("./data/results.csv")

# event â†’ final result lookup
event_result_map = dict(
    zip(results_df['event'], results_df['result'])
)

In [4]:
import pandas as pd
from datetime import timedelta

# Paths
input_csvs = ["./data/m_listening4.csv", "./data/w_listening1.csv"]
output_csv = "./end_prob.csv"

def next_quarter(ts): # Given a timestamp, return the next quarter-hour timestamp.
    minute = (ts.minute // 15 + 1) * 15
    if minute == 60:
        return ts.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
    else:
        return ts.replace(minute=minute, second=0, microsecond=0)

results_rows = []

for input_csv in input_csvs:
    df = pd.read_csv(input_csv)

    # Parse timestamp
    df['timestamp'] = pd.to_datetime(df['timestamp'])

    # Iterate through events
    for event, event_df in df.groupby('event'):
        event_df = event_df.sort_values('timestamp').copy()

        # Compute next quarter for each row
        event_df['next_quarter'] = event_df['timestamp'].apply(next_quarter)

        # Keep only rows within last 5 minutes
        final_5min_df = event_df[
            (event_df['next_quarter'] - event_df['timestamp']) <= timedelta(minutes=5)
        ]

        # Process UP and DOWN independently
        for side in ['UP', 'DOWN']:
            side_df = final_5min_df[
                (final_5min_df['buy_pick'] == side) &
                (final_5min_df['buy_best_ask'] >= 0.95)
            ]

            if side_df.empty:
                continue

            # First time this side reaches >= 0.95
            first_hit = side_df.iloc[0]

            # Determine win / lose if result exists
            final_result = event_result_map.get(event)
            if final_result is None:
                outcome = ''
            else:
                outcome = 'WIN' if side == final_result else 'LOSE'

            time_left = round(
                (next_quarter(first_hit['timestamp']) - first_hit['timestamp']).total_seconds()
            )
            
            results_rows.append({
                'event': event,
                'timestamp': first_hit['timestamp'],
                'time_left': time_left,
                'buy_pick': side,
                'buy_size': first_hit['buy_size'],
                'buy_best_ask': first_hit['buy_best_ask'],
                'final': final_result,
                'results': outcome
            })

# Save output
end_prob_df = pd.DataFrame(results_rows)
end_prob_df.to_csv(output_csv, index=False)

print(f"Saved {len(end_prob_df)} rows to {output_csv}")


Saved 23 rows to ./end_prob.csv
