In [1]:
import pandas as pd
import numpy as np
import os

import config as cfg
from utils.yahoo_downloader import YahooDownloader
import utils.helper_functions as hf

""" from stockstats import wrap

import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.feature_selection import mutual_info_classif """

pd.options.mode.chained_assignment = None

In [2]:
folder_path = './db/'
file_name = 'ohlcv.pkl'
file_path = folder_path + file_name

if not os.path.exists(folder_path):
    os.makedirs(folder_path)

if os.path.isfile(file_path):
    df = pd.read_pickle(file_path)

else: # 2 minutes
    df = YahooDownloader(cfg.start_date, cfg.end_date, cfg.tickers).fetch_data()        
    df.to_pickle(file_path)
    df.to_csv(folder_path + 'ohlcv.csv')

df.head()

Unnamed: 0,date,open,high,low,close,volume,tic,day
0,2000-01-03,48.0,48.0,47.25,47.25,16100.0,MT,0
1,2000-01-04,3.83,3.9,3.6,3.68,10424000.0,A2A.MI,1
2,2000-01-04,9.82,9.82,9.82,9.82,4250.0,ABCA.PA,1
3,2000-01-04,34.355453,34.348408,32.248322,32.769817,926084.0,AC.PA,1
4,2000-01-04,19.790001,20.59,19.209999,20.58,1449856.0,AF.PA,1


In [3]:
df['date'] = pd.to_datetime(df['date'])
df['year'] = df['date'].dt.strftime('%Y')
print(df)

             date         open         high          low        close  \
0      2000-01-03    48.000000    48.000000    47.250000    47.250000   
1      2000-01-04     3.830000     3.900000     3.600000     3.680000   
2      2000-01-04     9.820000     9.820000     9.820000     9.820000   
3      2000-01-04    34.355453    34.348408    32.248322    32.769817   
4      2000-01-04    19.790001    20.590000    19.209999    20.580000   
...           ...          ...          ...          ...          ...   
943749 2022-12-30    31.940001    32.000000    31.680000    31.680000   
943750 2022-12-30    42.950001    43.400002    42.950001    43.150002   
943751 2022-12-30    36.790001    36.790001    36.259998    36.529999   
943752 2022-12-30     6.600000     6.690000     6.600000     6.615000   
943753 2022-12-30  6533.120117  6540.509766  6470.640137  6473.759766   

            volume      tic  day  year  
0          16100.0       MT    0  2000  
1       10424000.0   A2A.MI    1  2000  


In [4]:
print(df.dtypes)

date      datetime64[ns]
open             float64
high             float64
low              float64
close            float64
volume           float64
tic               object
day                int64
year              object
dtype: object


In [5]:
def add_supports_resistances(df: pd.DataFrame) -> pd.DataFrame:
    df['rolling_min'] = hf.get_rolling_min(df['low'], cfg.target_days)
    df['rolling_max'] = hf.get_rolling_max(df['high'], cfg.target_days)
    df['last_close'] = df['close'].shift(1)

    df['pivot'] = hf.get_pivot(df['rolling_max'], df['rolling_min'], df['last_close'])

    df = df.dropna()

    df['support1'] = hf.get_support1(df['pivot'], df['rolling_max'])
    df['support2'] = hf.get_support2(df['pivot'], df['rolling_max'], df['rolling_min'])
    df['resistance1'] = hf.get_resistance1(df['pivot'], df['rolling_min'])
    df['resistance2'] = hf.get_resistance2(df['pivot'], df['rolling_max'], df['rolling_min'])

    return df

In [6]:
support_column = 'support1'
resistance_column = 'resistance1'

def init_support_resistance(df: pd.DataFrame) -> list:
    support = df.iloc[0][support_column]
    resistance = df.iloc[0][resistance_column]
    return support, resistance

def update_support_resistance(row_df: pd.DataFrame) -> list:
    support = row_df.__getattribute__(support_column)
    resistance = row_df.__getattribute__(resistance_column)
    return support, resistance

def get_profit(df: pd.DataFrame) -> list:
    profit = 1
    buy_price = 0
    bought_days = 0
    is_bought = False

    support, resistance = init_support_resistance(df)

    for row in df.itertuples():
        last_close = row.last_close
        open = row.open

        if (is_bought and last_close > resistance):
            is_bought = False
            sell_price = open
            profit *= (sell_price / buy_price) * cfg.fee_coef
        elif (not is_bought and last_close < support):
            is_bought = True
            buy_price = open
            profit *= cfg.fee_coef
        
        if (last_close > resistance or last_close < support):
            support, resistance = update_support_resistance(row)

        if is_bought:
            bought_days += 1

    return profit - 1, bought_days

In [7]:
profits = []
available_tickers = df['tic'].unique()

for ticker in available_tickers:
    ticker_df = df.loc[df['tic'] == ticker]

    years = ticker_df['year'].unique()

    for year in years:
        year_df = ticker_df.loc[ticker_df['year'] == year]
        year_df = add_supports_resistances(year_df)

        days = len(year_df)

        if days > 0:
            profit, bought_days = get_profit(year_df)

            profits.append({
                'ticker_year': f'{ticker}-{year}',
                'profit': hf.pct(profit),
                'daily_profit': hf.pct(profit / bought_days) if bought_days > 0 else 0,
                'days': days,
                'bought_days': bought_days,
                'bought_days%': hf.pct(bought_days / days)
            })

profit_df = pd.DataFrame(profits)
print(profit_df.head())
print(f"Average profit: {profit_df['profit'].mean(skipna=True)}%")
print(f"Average daily profit %: {profit_df['daily_profit'].mean(skipna=True)}%")
print(f"Average bought days %: {profit_df['bought_days%'].mean(skipna=True)} days")

  ticker_year  profit  daily_profit  days  bought_days  bought_days%
0     MT-2000    -0.6         -0.01   152           71         46.71
1     MT-2001    -0.6         -0.01   148           91         61.49
2     MT-2002     0.0          0.00   152            0          0.00
3     MT-2003     0.0          0.00   152            0          0.00
4     MT-2004     0.0          0.00   152            0          0.00
Average profit: 2.1944315419929787%
Average daily profit %: 0.05490683229813664%
Average bought days %: 25.620067512827436 days


In [8]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):
    print(profit_df)

        ticker_year  profit  daily_profit  days  bought_days  bought_days%
0           MT-2000   -0.60         -0.01   152           71         46.71
1           MT-2001   -0.60         -0.01   148           91         61.49
2           MT-2002    0.00          0.00   152            0          0.00
3           MT-2003    0.00          0.00   152            0          0.00
4           MT-2004    0.00          0.00   152            0          0.00
5           MT-2005    0.00          0.00   152            0          0.00
6           MT-2006    0.00          0.00   151            0          0.00
7           MT-2007    0.00          0.00   151            0          0.00
8           MT-2008   -0.60         -0.01   153           82         53.59
9           MT-2009    0.00          0.00   152            0          0.00
10          MT-2010    0.00          0.00   152            0          0.00
11          MT-2011   -0.60         -0.01   152          105         69.08
12          MT-2012    0.