In [1]:
import numpy as np
import pandas as pd
import os
import datetime
from tqdm import tqdm

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import warnings
warnings.simplefilter("ignore")

In [4]:
import data_feeder

In [6]:
from sklearn.linear_model import LinearRegression

## Main Class

In [8]:
class PearsonBot:
    def __init__(self, timeframe: int = 30, min_linreg: int = 3) -> object:
        self.timeframe = timeframe
        self.min_linreg = min_linreg
        self.df_timeframe = pd.DataFrame()
        self.counter = 0
        self.m = np.nan
        self.c = np.nan
        self.process = True
        self.std = np.nan
        self.temp_flag = True
    
    def flush_counter(self):
        self.counter = 0
    
    def flush_df(self):
        self.df_timeframe = pd.DataFrame()
    
    def flush_coeffs(self):
        self.m = np.nan
        self.c = np.nan
    
    def flush_all(self):
        self.flush_df()
        self.flush_counter()
        self.flush_coeffs()
    
    def calc_hl2(self) -> None:
        self.df_timeframe['hl2'] = ((self.df_timeframe['High'] - self.df_timeframe['Low']) / 2) + self.df_timeframe['Low']
    
    def calc_coeffs(self, df):
        lr = LinearRegression()
        lr.fit(df[['index']], df['hl2'])
        self.m = lr.coef_
        self.c = lr.intercept_
    
    def lin_reg_fn(self, x):
        return (self.m * x) + self.c
    
    def calc_lr(self):
        self.calc_coeffs()
        self.df_timeframe['lin_reg'] = self.df_timeframe['index'].apply(self.lin_reg_fn)

    def on_tick(self, df: pd.DataFrame):
        if self.counter <= self.timeframe:
            df['Datetime'] = pd.to_datetime(df['Datetime'])
            self.df_timeframe = pd.concat([self.df_timeframe, df])
        else:
            if self.process:
                self.calc_hl2()
                self.df_timeframe = self.df_timeframe.reset_index().drop(columns='index').reset_index()
                self.calc_lr()
                self.std = self.df_timeframe['hl2'].std()
                self.lin_reg_height = (self.df_timeframe['lin_reg'].max() - self.df_timeframe['lin_reg'].min())
                self.process = False
            else:
                print('monitoring...')
                self.flush_all()
                self.temp_flag = False      
    
    def display(self):
        self.df_timeframe[['hl2','lin_reg']].plot(figsize=(15,15))
    
    def main(self):
        for df in tqdm(data_feeder.get_ticks()):
            if self.temp_flag:
                self.on_tick()
            else:
                break
        
        self.display()

In [9]:
pb = PearsonBot()

In [10]:
pb.main()

TypeError: get_ticks() missing 1 required positional argument: 'df'

In [7]:
df = pd.read_csv("data/esapr.txt", sep=",")

In [9]:
df['Datetime'] = df['Date'] + " " + df['Time']
df

Unnamed: 0,Date,Time,Open,High,Low,Close,Up,Down,Datetime
0,04/01/2022,00:00:00,4547.75,4547.75,4547.25,4547.25,0,9,04/01/2022 00:00:00
1,04/01/2022,00:00:01,4547.25,4547.75,4547.25,4547.75,86,48,04/01/2022 00:00:01
2,04/01/2022,00:00:02,4547.50,4547.50,4546.50,4546.50,2,54,04/01/2022 00:00:02
3,04/01/2022,00:00:03,4546.25,4547.25,4546.25,4547.25,47,10,04/01/2022 00:00:03
4,04/01/2022,00:00:04,4547.00,4547.00,4546.50,4547.00,24,21,04/01/2022 00:00:04
...,...,...,...,...,...,...,...,...,...
943841,04/29/2022,13:59:56,4137.25,4138.00,4137.25,4138.00,47,1,04/29/2022 13:59:56
943842,04/29/2022,13:59:57,4137.50,4137.75,4137.50,4137.50,7,3,04/29/2022 13:59:57
943843,04/29/2022,13:59:58,4137.75,4137.75,4137.50,4137.50,4,2,04/29/2022 13:59:58
943844,04/29/2022,13:59:59,4137.00,4137.00,4137.00,4137.00,0,6,04/29/2022 13:59:59


In [10]:
df = df[['Datetime', 'Open', 'High', 'Low', 'Close']]

In [12]:
df['Datetime'] = pd.to_datetime(df['Datetime'])

In [13]:
ohlc = {
    "Open":"first",
    "High":"max",
    "Low":"min",
    "Close":"last"
}

In [14]:
df_resampled = df.set_index('Datetime').resample("1T", closed="left", label="left").agg(ohlc)

In [17]:
df_resampled.dropna(subset=['Open'], inplace=True)

In [21]:
df_resampled.reset_index().to_csv("resampled.csv", index=False)

In [24]:
df = pd.read_csv("data/resampled.csv")
df

Unnamed: 0,Datetime,Open,High,Low,Close
0,2022-04-01 00:00:00,4547.75,4547.75,4546.00,4546.75
1,2022-04-01 00:01:00,4546.75,4547.25,4546.25,4546.75
2,2022-04-01 00:02:00,4546.50,4547.00,4544.50,4546.00
3,2022-04-01 00:03:00,4545.75,4546.50,4545.50,4546.25
4,2022-04-01 00:04:00,4546.25,4547.25,4545.25,4545.25
...,...,...,...,...,...
27069,2022-04-29 13:56:00,4137.50,4138.00,4137.00,4137.75
27070,2022-04-29 13:57:00,4137.50,4138.00,4137.50,4138.00
27071,2022-04-29 13:58:00,4138.00,4138.00,4136.25,4137.00
27072,2022-04-29 13:59:00,4136.75,4138.00,4135.25,4137.00


In [25]:
df['Datetime'].values[0]

'2022-04-01 00:00:00'