In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier # added (from sklearn v. 1.7)

In [2]:
data = pd.read_csv("five_minute.csv", parse_dates = ["time"], index_col = "time")
data

Unnamed: 0_level_0,price
time,Unnamed: 1_level_1
2019-01-01 22:00:00+00:00,1.146580
2019-01-01 22:05:00+00:00,1.146350
2019-01-01 22:10:00+00:00,1.146320
2019-01-01 22:15:00+00:00,1.146320
2019-01-01 22:20:00+00:00,1.146530
...,...
2019-12-30 23:35:00+00:00,1.120180
2019-12-30 23:40:00+00:00,1.120210
2019-12-30 23:45:00+00:00,1.120295
2019-12-30 23:50:00+00:00,1.120275


In [3]:
data["returns"] = np.log(data.div(data.shift(1)))

In [4]:
data.dropna(inplace = True)

In [5]:
data["direction"] = np.sign(data.returns)
data

Unnamed: 0_level_0,price,returns,direction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2019-01-01 22:05:00+00:00,1.146350,-0.000201,-1.0
2019-01-01 22:10:00+00:00,1.146320,-0.000026,-1.0
2019-01-01 22:15:00+00:00,1.146320,0.000000,0.0
2019-01-01 22:20:00+00:00,1.146530,0.000183,1.0
2019-01-01 22:25:00+00:00,1.146475,-0.000048,-1.0
...,...,...,...
2019-12-30 23:35:00+00:00,1.120180,-0.000004,-1.0
2019-12-30 23:40:00+00:00,1.120210,0.000027,1.0
2019-12-30 23:45:00+00:00,1.120295,0.000076,1.0
2019-12-30 23:50:00+00:00,1.120275,-0.000018,-1.0


In [6]:
lags = 5

In [7]:
cols = []
for lag in range(1, lags + 1):
    col = "lag{}".format(lag)
    data[col] = data.returns.shift(lag)
    cols.append(col)
data.dropna(inplace = True)

In [8]:
means = data[cols].mean()
means

lag1   -3.142659e-07
lag2   -3.115388e-07
lag3   -3.125681e-07
lag4   -3.132864e-07
lag5   -3.159474e-07
dtype: float64

In [9]:
stand_devs = data[cols].std()
stand_devs

lag1    0.000199
lag2    0.000199
lag3    0.000199
lag4    0.000199
lag5    0.000199
dtype: float64

In [10]:
data[cols] = (data[cols]-means) / stand_devs
data

Unnamed: 0_level_0,price,returns,direction,lag1,lag2,lag3,lag4,lag5
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2019-01-01 22:30:00+00:00,1.146455,-0.000017,-1.0,-0.240031,0.924152,0.001574,-0.130230,-1.008816
2019-01-01 22:35:00+00:00,1.146455,0.000000,0.0,-0.086280,-0.240044,0.924158,0.001578,-0.130216
2019-01-01 22:40:00+00:00,1.146370,-0.000074,-1.0,0.001583,-0.086293,-0.240039,0.924162,0.001591
2019-01-01 22:45:00+00:00,1.146315,-0.000048,-1.0,-0.371850,0.001569,-0.086288,-0.240035,0.924169
2019-01-01 22:50:00+00:00,1.146475,0.000140,1.0,-0.240065,-0.371862,0.001574,-0.086284,-0.240020
...,...,...,...,...,...,...,...,...
2019-12-30 23:35:00+00:00,1.120180,-0.000004,-1.0,-0.560410,-0.088343,0.113965,-0.020901,0.451175
2019-12-30 23:40:00+00:00,1.120210,0.000027,1.0,-0.020898,-0.560420,-0.088338,0.113969,-0.020887
2019-12-30 23:45:00+00:00,1.120295,0.000076,1.0,0.136467,-0.020912,-0.560415,-0.088335,0.113981
2019-12-30 23:50:00+00:00,1.120275,-0.000018,-1.0,0.383736,0.136453,-0.020907,-0.560412,-0.088321


In [11]:
lm = OneVsRestClassifier(LogisticRegression(C = 1e6, max_iter = 100000)) # new (from sklearn v. 1.7)

In [12]:
lm.fit(data[cols], data.direction)
lm

In [13]:
data["pred"] = lm.predict(data[cols])
data

Unnamed: 0_level_0,price,returns,direction,lag1,lag2,lag3,lag4,lag5,pred
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2019-01-01 22:30:00+00:00,1.146455,-0.000017,-1.0,-0.240031,0.924152,0.001574,-0.130230,-1.008816,1.0
2019-01-01 22:35:00+00:00,1.146455,0.000000,0.0,-0.086280,-0.240044,0.924158,0.001578,-0.130216,1.0
2019-01-01 22:40:00+00:00,1.146370,-0.000074,-1.0,0.001583,-0.086293,-0.240039,0.924162,0.001591,-1.0
2019-01-01 22:45:00+00:00,1.146315,-0.000048,-1.0,-0.371850,0.001569,-0.086288,-0.240035,0.924169,1.0
2019-01-01 22:50:00+00:00,1.146475,0.000140,1.0,-0.240065,-0.371862,0.001574,-0.086284,-0.240020,1.0
...,...,...,...,...,...,...,...,...,...
2019-12-30 23:35:00+00:00,1.120180,-0.000004,-1.0,-0.560410,-0.088343,0.113965,-0.020901,0.451175,1.0
2019-12-30 23:40:00+00:00,1.120210,0.000027,1.0,-0.020898,-0.560420,-0.088338,0.113969,-0.020887,1.0
2019-12-30 23:45:00+00:00,1.120295,0.000076,1.0,0.136467,-0.020912,-0.560415,-0.088335,0.113981,1.0
2019-12-30 23:50:00+00:00,1.120275,-0.000018,-1.0,0.383736,0.136453,-0.020907,-0.560412,-0.088321,-1.0


In [14]:
hits = np.sign(data.direction * data.pred).value_counts()
hits

 1.0    37636
-1.0    34120
 0.0     1958
Name: count, dtype: int64

In [15]:
hit_ratio = hits[1.0] / sum(hits)
hit_ratio

0.5105678704181024

In [16]:
lm

In [17]:
import pickle

In [18]:
pickle.dump(lm, open("logreg.pkl", "wb"))

In [19]:
params = {"mu":means, "std":stand_devs}
params

{'mu': lag1   -3.142659e-07
 lag2   -3.115388e-07
 lag3   -3.125681e-07
 lag4   -3.132864e-07
 lag5   -3.159474e-07
 dtype: float64,
 'std': lag1    0.000199
 lag2    0.000199
 lag3    0.000199
 lag4    0.000199
 lag5    0.000199
 dtype: float64}

In [20]:
pickle.dump(params, open("params.pkl", "wb"))

In [21]:
from ib_async import * 
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
import pickle
import datetime as dt
from datetime import datetime, timezone # new
from IPython.display import display, clear_output
util.startLoop()

In [22]:
ib = IB()
ib.connect()

<IB connected to 127.0.0.1:7497 clientId=1>

Error 321, reqId 139: Error validating request.-'bK' : cause - Historical data bar size setting is invalid. Legal ones are: 1 secs, 5 secs, 10 secs, 15 secs, 30 secs, 1 min, 2 mins, 3 mins, 5 mins, 10 mins, 15 mins, 20 mins, 30 mins, 1 hour, 2 hours, 3 hours, 4 hours, 8 hours, 1 day, 1W, 1M, contract: Forex('EURUSD', conId=12087792, exchange='IDEALPRO', localSymbol='EUR.USD', tradingClass='EUR.USD')
  target = df["position"][-1] * units
  target = df["position"][-1] * units
Error 10182, reqId 144: Failed to request live updates (disconnected).
Error 10182, reqId 136: Failed to request live updates (disconnected).
Error 1100, reqId -1: Connectivity between IBKR and Trader Workstation has been lost.
Error 1100, reqId -1: Connectivity between IBKR and Trader Workstation has been lost.
Error 1102, reqId -1: Connectivity between IBKR and Trader Workstation has been restored - data maintained. All data farms are connected: usfarm.nj; hfarm; cashfarm; usfarm; apachmds; cashhmds; ushmds; secde

In [None]:
# strategy parameters
freq = "5 mins"
lm = pickle.load(open("logreg.pkl", "rb"))
lags = 5
params = pickle.load(open("params.pkl", "rb"))
means = params["mu"]
stand_devs = params["std"]
units = 1000
end_time = dt.time(23, 59, 0) # stop condition
contract = Forex('EURUSD') 
ib.qualifyContracts(contract)
cfd = CFD("EUR", currency = "USD")
ib.qualifyContracts(cfd)
conID = cfd.conId

Unnamed: 0_level_0,close,returns,lag1,lag2,lag3,lag4,lag5,position
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-03-10 21:45:00+00:00,1.083570,0.000009,0.559393,0.234007,0.350252,0.234043,-0.370345,-1.0
2025-03-10 21:50:00+00:00,1.083555,-0.000014,0.048064,0.559376,0.234012,0.350255,0.234055,-1.0
2025-03-10 21:55:00+00:00,1.083520,-0.000032,-0.068139,0.048050,0.559382,0.234016,0.350266,-1.0
2025-03-10 22:00:00+00:00,1.083640,0.000111,-0.161106,-0.068153,0.048055,0.559386,0.234028,1.0
2025-03-10 22:05:00+00:00,1.083590,-0.000046,0.559352,-0.161119,-0.068148,0.048059,0.559395,-1.0
...,...,...,...,...,...,...,...,...
2025-03-11 14:55:00+00:00,1.091935,0.000069,0.808893,3.416748,1.940952,0.717489,1.526118,-1.0
2025-03-11 15:00:00+00:00,1.092145,0.000192,0.347533,0.808874,3.416757,1.940955,0.717498,-1.0
2025-03-11 15:05:00+00:00,1.091950,-0.000179,0.970117,0.347517,0.808880,3.416760,1.940955,-1.0
2025-03-11 15:10:00+00:00,1.091810,-0.000128,-0.897764,0.970098,0.347523,0.808884,3.416750,-1.0


Unnamed: 0_level_0,side,cumQty,avgPrice,realizedPNL,cumPNL
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2025-03-11 15:10:05+00:00,SLD,2000.0,1.0919,-1.7,-1.7
2025-03-11 15:15:05+00:00,BOT,2000.0,1.09205,-2.15,-3.85


In [30]:
lm

In [31]:
def onBarUpdate(bars, hasNewBar):  
    global df, last_bar
    
    if bars[-1].date > last_bar: 
        last_bar = bars[-1].date
    
        # Data Processing
        df = pd.DataFrame(bars)[["date", "open", "high", "low", "close"]]#.iloc[:-1] keep latest incomplete bar
        df.set_index("date", inplace = True)
        
        ####################### Trading Strategy ###########################
        df = df[["close"]].copy()
        df["returns"] = np.log(df["close"] / df["close"].shift())

        cols = []
        for lag in range(1, lags + 1):
            col = "lag{}".format(lag)
            df[col] = df.returns.shift(lag)
            cols.append(col)
        df.dropna(inplace = True)
        df[cols] = (df[cols] - means) / stand_devs
        
        df["position"] = lm.predict(df[cols])
        ####################################################################
        
        # Trading
        target = df["position"][-1] * units
        execute_trade(target = target)
        
        # Display
        clear_output(wait=True)
        display(df)
    else:
        try:
            trade_reporting()
        except:
            pass

def execute_trade(target):
    global current_pos
    
    # 1. get current Position
    try:
        current_pos = [pos.position for pos in ib.positions() if pos.contract.conId == conID][0]
    except:
        current_pos = 0
         
    # 2. identify required trades
    trades = target - current_pos
        
    # 3. trade execution
    if trades > 0:
        side = "BUY"
        order = MarketOrder(side, abs(trades))
        trade = ib.placeOrder(cfd, order)  
    elif trades < 0:
        side = "SELL"
        order = MarketOrder(side, abs(trades))
        trade = ib.placeOrder(cfd, order)
    else:
        pass

def trade_reporting():
    global report
    
    fill_df = util.df([fs.execution for fs in ib.fills()])[["execId", "time", "side", "cumQty", "avgPrice"]].set_index("execId")
    profit_df = util.df([fs.commissionReport for fs in ib.fills()])[["execId", "realizedPNL"]].set_index("execId")
    report = pd.concat([fill_df, profit_df], axis = 1).set_index("time").loc[session_start:]
    report = report.groupby("time").agg({"side":"first", "cumQty":"max", "avgPrice":"mean", "realizedPNL":"sum"})
    report["cumPNL"] = report.realizedPNL.cumsum()
        
    clear_output(wait=True)
    display(df, report)

In [34]:
# start trading session
session_start = pd.to_datetime(datetime.now(timezone.utc))# new
bars = ib.reqHistoricalData(
        contract,
        endDateTime='',
        durationStr='1 D',
        barSizeSetting=freq,
        whatToShow='MIDPOINT',
        useRTH=True,
        formatDate=2,
        keepUpToDate=True)
last_bar = bars[-1].date
bars.updateEvent += onBarUpdate
ib.sleep(30) # new - to be added (optional)

# stop trading session
while True:
    ib.sleep(5) # check every 5 seconds
    if datetime.now(timezone.utc).time() >= end_time: # if stop conditions has been met
        execute_trade(target = 0) # close open position 
        ib.cancelHistoricalData(bars) # stop stream
        ib.sleep(10)
        try:
            trade_reporting() # final reporting
        except:
            pass
        print("Session Stopped.")
        ib.disconnect()
        break
    else:
        pass

Unnamed: 0_level_0,close,returns,lag1,lag2,lag3,lag4,lag5,position
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-03-10 21:45:00+00:00,1.083570,0.000009,0.559393,0.234007,0.350252,0.234043,-0.370345,-1.0
2025-03-10 21:50:00+00:00,1.083555,-0.000014,0.048064,0.559376,0.234012,0.350255,0.234055,-1.0
2025-03-10 21:55:00+00:00,1.083520,-0.000032,-0.068139,0.048050,0.559382,0.234016,0.350266,-1.0
2025-03-10 22:00:00+00:00,1.083640,0.000111,-0.161106,-0.068153,0.048055,0.559386,0.234028,1.0
2025-03-10 22:05:00+00:00,1.083590,-0.000046,0.559352,-0.161119,-0.068148,0.048059,0.559395,-1.0
...,...,...,...,...,...,...,...,...
2025-03-11 14:55:00+00:00,1.091935,0.000069,0.808893,3.416748,1.940952,0.717489,1.526118,-1.0
2025-03-11 15:00:00+00:00,1.092145,0.000192,0.347533,0.808874,3.416757,1.940955,0.717498,-1.0
2025-03-11 15:05:00+00:00,1.091950,-0.000179,0.970117,0.347517,0.808880,3.416760,1.940955,-1.0
2025-03-11 15:10:00+00:00,1.091810,-0.000128,-0.897764,0.970098,0.347523,0.808884,3.416750,-1.0


Unnamed: 0_level_0,side,cumQty,avgPrice,realizedPNL,cumPNL
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1


ConnectionError: Socket disconnect

In [None]:
ib.cancelHistoricalData(bars)

In [None]:
ib.disconnect()