In [1]:
"""
- Questions: bid vs ask, ticker
- Divide upon event and buy side
- Get prob with the parameters and thresholds set
"""

'\n- Questions: bid vs ask, ticker\n- Divide upon event and buy side\n- Get prob with the parameters and thresholds set\n'

In [None]:
# parameters list
minutes_before_end = [1, 3, 5, 10]
ask_price = [80, 85, 90, 95]



In [2]:
import pandas as pd

csv_path = "./data/of_listening2.csv"
df = pd.read_csv(csv_path)

# Create a multi-index grouped DataFrame
grouped_df = df.groupby(['event', 'buy_pick']).apply(lambda x: x)

print(f"{len(df['event'].unique())} events in the dataset:\n{'='*80}")

for i, event in enumerate(df['event'].unique(), 1):
    event_df = df[df['event'] == event]
    up_count = len(event_df[event_df['buy_pick'] == 'UP'])
    down_count = len(event_df[event_df['buy_pick'] == 'DOWN'])
    print(f"{i}: {event} - UP: {up_count}, DOWN: {down_count}, TOTAL: {up_count + down_count} \n {'-' * 40}")

# Function to get records for specific event and buy_pick
def get_records(event_name, buy_pick_value):
    try:
        return grouped_df.loc[(event_name, buy_pick_value)]
    except KeyError:
        # Return empty DataFrame if not found
        return pd.DataFrame(columns=df.columns)
    
# Example usage:
# Get all UP picks for the Bitcoin event
e = "Bitcoin Up or Down - January 13, 4:00AM-4:15AM ET"
result = get_records(
    event_name=e,
    buy_pick_value="UP"
)

print(f"Searched for {e}. Found {len(result)} records")
print(result)

5 events in the dataset:
1: Bitcoin Up or Down - January 13, 4:00AM-4:15AM ET - UP: 846, DOWN: 728, TOTAL: 1574 
 ----------------------------------------
2: Bitcoin Up or Down - January 13, 4:15AM-4:30AM ET - UP: 899, DOWN: 847, TOTAL: 1746 
 ----------------------------------------
3: Bitcoin Up or Down - January 13, 4:30AM-4:45AM ET - UP: 780, DOWN: 899, TOTAL: 1679 
 ----------------------------------------
4: Bitcoin Up or Down - January 13, 4:45AM-5:00AM ET - UP: 899, DOWN: 873, TOTAL: 1772 
 ----------------------------------------
5: Bitcoin Up or Down - January 13, 5:00AM-5:15AM ET - UP: 420, DOWN: 420, TOTAL: 840 
 ----------------------------------------
Searched for Bitcoin Up or Down - January 13, 4:00AM-4:15AM ET. Found 846 records
                timestamp  left (real time)  \
0     2026-01-13 17:00:42               857   
2     2026-01-13 17:00:43               856   
5     2026-01-13 17:00:44               855   
6     2026-01-13 17:00:44               854   
8     202

  grouped_df = df.groupby(['event', 'buy_pick']).apply(lambda x: x)


In [None]:
import pandas as pd
from datetime import timedelta

# Paths
input_csv = "./data/of_listening2.csv"
output_csv = "./end_prob.csv"

# Load data
df = pd.read_csv(input_csv)

# Parse timestamp
df['timestamp'] = pd.to_datetime(df['timestamp'])

def next_quarter(ts):
    """
    Given a timestamp, return the next quarter-hour timestamp.
    """
    minute = (ts.minute // 15 + 1) * 15
    if minute == 60:
        return ts.replace(minute=0, second=0, microsecond=0) + timedelta(hours=1)
    else:
        return ts.replace(minute=minute, second=0, microsecond=0)

results_rows = []

# Iterate through events
for event, event_df in df.groupby('event'):
    event_df = event_df.sort_values('timestamp').copy()

    # Compute next quarter for each row
    event_df['next_quarter'] = event_df['timestamp'].apply(next_quarter)

    # Keep only rows within last 5 minutes
    final_5min_df = event_df[
        (event_df['next_quarter'] - event_df['timestamp']) <= timedelta(minutes=5)
    ]

    # Process UP and DOWN independently
    for side in ['UP', 'DOWN']:
        side_df = final_5min_df[
            (final_5min_df['buy_pick'] == side) &
            (final_5min_df['buy_best_ask'] >= 0.95)
        ]

        if side_df.empty:
            continue

        # First time this side reaches >= 0.95
        first_hit = side_df.iloc[0]

        results_rows.append({
            'event': event,
            'timestamp': first_hit['timestamp'],
            'buy_pick': side,
            'buy_size': first_hit['buy_size'],
            'buy_best_ask': first_hit['buy_best_ask'],
            'results': ''
        })

# Save output
end_prob_df = pd.DataFrame(results_rows)
end_prob_df.to_csv(output_csv, index=False)

print(f"Saved {len(end_prob_df)} rows to {output_csv}")


Saved 4 rows to ./data/end_prob.csv
