In [111]:
import numpy as np
import pandas as pd

# --- Configuration ---
n_ticks = 10000
start_time = pd.Timestamp("2025-01-01 09:30:00")

# --- Generate Tick Data ---
# 1. Generate the *raw numerical values* for the time intervals (in seconds)
time_deltas_raw = np.random.exponential(size=n_ticks) 

# 2. Calculate the cumulative sum of those numerical values
cumulative_time_seconds = time_deltas_raw.cumsum()

# 3. Convert the cumulative seconds into pandas Timedelta objects
cumulative_timedeltas = pd.to_timedelta(cumulative_time_seconds, unit="s")

# 4. Add the cumulative time deltas to the start time to get the final timestamps
timestamps = start_time + cumulative_timedeltas

# Base Price Movement (Random Walk)
base_price = 100 + np.random.normal(0, 0.05, size=n_ticks).cumsum()

# Simulate Exchange Divergence (rest of your original code)
price_a = base_price + np.random.normal(0, 0.1, size=n_ticks)
price_b = base_price + np.random.normal(0, 0.1, size=n_ticks) + np.sin(np.linspace(0, 10, n_ticks)) * 0.5 

# Create the DataFrame
trades = pd.DataFrame({
    "Timestamp": timestamps,
    "Price_A": price_a,
    "Price_B": price_b,
    "Volume": np.random.randint(1, 100, size=n_ticks)
})

# Add some "Flash Crashes" (Dirty Data / Anomalies)
random_indices = np.random.choice(trades.index, 5, replace=False)
trades.loc[random_indices, "Price_A"] = 1.0 


In [112]:
trades["Timestamp"] = pd.to_datetime(trades["Timestamp"])
trades["Price_A"] = pd.to_numeric(trades["Price_A"], errors="coerce")
trades = trades[trades["Price_A"].notna()]  # drop rows with non-numeric price
filtered_count = len(trades[trades["Price_A"] < 50])
trades = trades[trades["Price_A"] >= 50]
trades = trades.set_index("Timestamp")
ohlcA= trades["Price_A"].resample('1T').ohlc()
ohlcA = ohlcA.rename(columns = {
    'open': 'PriceA_open',
    'high': 'PriceA_high',
    'low': 'PriceA_low',
    'close': 'PriceA_close',
}
)
ohlcB = trades["Price_B"].resample("1T").ohlc()
ohlcB = ohlcB.rename(columns = {
'open' : 'PriceB_open',
'high' : 'PriceB_high',
'low' : 'PriceB_low',
'close': 'PriceB_close'
})
merged_df = pd.merge(ohlcA , ohlcB , on = 'Timestamp')
merged_df

  ohlcA= trades["Price_A"].resample('1T').ohlc()
  ohlcB = trades["Price_B"].resample("1T").ohlc()


Unnamed: 0_level_0,PriceA_open,PriceA_high,PriceA_low,PriceA_close,PriceB_open,PriceB_high,PriceB_low,PriceB_close
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-01-01 09:30:00,99.857481,100.128649,99.545993,99.875097,100.098349,100.264027,99.676165,99.895160
2025-01-01 09:31:00,99.916928,100.462405,99.707294,100.462405,100.029645,100.574636,99.641305,100.476140
2025-01-01 09:32:00,100.533543,100.892602,100.211328,100.526796,100.541730,100.966661,100.233501,100.693762
2025-01-01 09:33:00,100.481242,101.208792,100.371388,100.620125,100.539713,101.355165,100.420293,100.852618
2025-01-01 09:34:00,100.770783,101.093550,100.441458,100.441458,100.841901,101.260576,100.573200,100.667157
...,...,...,...,...,...,...,...,...
2025-01-01 12:16:00,106.412284,106.734818,106.153878,106.587247,106.211076,106.479709,105.853142,106.172288
2025-01-01 12:17:00,106.355679,107.328747,106.269888,107.328747,106.172285,106.956617,105.984661,106.956617
2025-01-01 12:18:00,106.932272,107.567977,106.775326,107.506082,106.903172,107.387325,106.507527,107.263745
2025-01-01 12:19:00,107.470384,107.921590,107.304807,107.757553,107.164797,107.777305,107.055791,107.389461


In [113]:
priceB_cols = [
    "PriceB_open",
    "PriceB_high",
    "PriceB_low",
    "PriceB_close"
]
merged_df[priceB_cols]= merged_df[priceB_cols].ffill()

In [114]:
merged_df["Spread"] = merged_df['PriceB_close']- merged_df['PriceA_close']

In [115]:
merged_df['moving_avg'] = merged_df["Spread"].rolling(20).mean()


In [116]:
merged_df['moving_std']= merged_df["Spread"].rolling(20).std()

In [119]:
merged_df.head(1)

Unnamed: 0_level_0,PriceA_open,PriceA_high,PriceA_low,PriceA_close,PriceB_open,PriceB_high,PriceB_low,PriceB_close,Spread,moving_avg,moving_std
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2025-01-01 09:30:00,99.857481,100.128649,99.545993,99.875097,100.098349,100.264027,99.676165,99.89516,0.020063,,


In [125]:
profitable_trade=   merged_df['Spread']  > merged_df['moving_avg'] + (2 * merged_df['moving_std'])

In [133]:
merged_df["Arbitrage_Signal"] = profitable_trade
merged_df[merged_df['Arbitrage_Signal'] == True].to_csv("arbitrage_opportunities.csv")
