In [1]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt

import config as cfg

In [2]:
# Download stock prices
stock_data = yf.download(cfg.tickers, start=cfg.start_date, end=cfg.end_date)

df = pd.DataFrame(stock_data) # Transform stock prices to a dataframe
df = df.sort_values("Date") # stock_data is not ordered by default
df = df[["Close"]]
df.columns = df.columns.droplevel(0) # Convert to single level columns
print(df.head(10))

[*********************100%***********************]  208 of 208 completed

4 Failed downloads:
- EUCAR.PA: No data found, symbol may be delisted
- URW.PA: Data doesn't exist for startDate = 965080800, endDate = 1672441200
- HOLN.PA: No data found, symbol may be delisted
- PHA.PA: No data found, symbol may be delisted
                     2G3.SG  A2A.MI  AB.PA  ABCA.PA  ABNX.PA  ABVX.PA  \
Date                                                                    
2000-07-31 00:00:00     NaN     NaN    NaN      NaN      NaN      NaN   
2000-08-01 00:00:00     NaN    4.27    NaN    12.28      NaN      NaN   
2000-08-02 00:00:00     NaN    4.24    NaN    11.55      NaN      NaN   
2000-08-03 00:00:00     NaN    4.06    NaN    11.60      NaN      NaN   
2000-08-04 00:00:00     NaN    4.15    NaN    11.60      NaN      NaN   
2000-08-07 00:00:00     NaN    4.18    NaN    11.50      NaN      NaN   
2000-08-08 00:00:00     NaN    4.14    NaN    12.20      NaN      NaN   
2000-08-09 00:00:00     N

In [3]:
# watch_days_range = [2]
# hold_days_range = [7]
# num_stocks_to_buy_range = [1]
# loss_limit_range = [0.999]

watch_days_range = [2,3,4,5]
hold_days_range = [2,3,4,5,6,7,8]
num_stocks_to_buy_range = [1,2]
loss_limit_range = [0.998,0.995]

num_combinations = len(watch_days_range) * len(hold_days_range) * len(num_stocks_to_buy_range) * len(loss_limit_range)
combinations_per_minute = 6

print(f"Number of combinations: {num_combinations}, Estimated time: {(num_combinations / combinations_per_minute):.2f} minutes")

Number of combinations: 112, Estimated time: 18.67 minutes


In [4]:
results = []
combination_index = 0
total_profits = []
all_profits = []
mean_price_increases = []
median_price_increases = []

for watch_days in watch_days_range:  # watch_days represents the number of days for price increase calculation
    for hold_days in hold_days_range:  # hold_days represents the number of additional days before selling. Repeat for each period of watch_days plus hold_days
        num_iterations = int(len(df) / (watch_days + hold_days)) - 1

        for num_stocks_to_buy in num_stocks_to_buy_range:  # num_stocks_to_buy represents the number of stocks to buy
            for loss_limit in loss_limit_range:  # loss_limit represents the percentage decrease threshold for selling
                total_profit = 1

                for i in range(num_iterations):
                    start_watch_day_number = i * (watch_days + hold_days)
                    buy_day_number = start_watch_day_number + watch_days
                    sell_day_number = buy_day_number + hold_days

                    price_increase = df[start_watch_day_number:buy_day_number].pct_change(watch_days - 1).tail(1) # Calculate the price increase in the last watch_days days for each stock

                    if len(price_increase.columns) >= num_stocks_to_buy:
                        top_stocks = price_increase.squeeze().nlargest(num_stocks_to_buy).index # Select the num_stocks_to_buy stocks with the highest price increase
                        # top_stocks = price_increase.squeeze().nsmallest(num_stocks_to_buy).index
                        
                        # top_stocks1 = price_increase.squeeze().nlargest(num_stocks_to_buy).index
                        # top_stocks2 = price_increase.squeeze().nsmallest(num_stocks_to_buy).index
                        # top_stocks = top_stocks1.union(top_stocks2)
                        # top_stocks = top_stocks.dropna()

                        buy_prices = df.loc[df.index[buy_day_number], top_stocks] # Calculate the buying prices at the start of the hold period
                        buy_prices = buy_prices.sort_index(axis=0)

                        hold_prices = df.loc[df.index[sell_day_number], top_stocks] # Calculate the selling prices after watch_days + hold_days days
                        hold_prices = hold_prices.sort_index(axis=0)

                        limit_prices = loss_limit * buy_prices

                        min_prices = df.iloc[buy_day_number+1:sell_day_number].min() # Calculate the minimum price in the holding period
                        min_prices = min_prices.loc[min_prices.index.intersection(top_stocks)]
                        min_prices = min_prices.sort_index(axis=0)
                        
                        condition = min_prices < limit_prices

                        sell_prices = hold_prices.copy()
                        sell_prices[condition] = limit_prices[condition]

                        profits = (sell_prices * (1 - cfg.fee)) / (buy_prices * (1 + cfg.fee)) # Calculate the profit for each stock
                        profits = profits.dropna()

                        profit = profits.mean(skipna=True) # Calculate the average of profits of the selected stocks
                        
                        total_profit *= profit if np.isfinite(profit) else 1

                        total_profits.append(total_profit)
                        mean_price_increases.append(price_increase.mean(axis=1))
                        median_price_increases.append(price_increase.median(axis=1))
                        all_profits.append(profit)

                results.append({'watch_days': watch_days, 'num_stocks_to_buy': num_stocks_to_buy, 'hold_days': hold_days,
                                'loss_limit': loss_limit, 'total_profit': total_profit})
                
                print(f"Combination: {combination_index + 1} / {num_combinations}")
                combination_index += 1

results_df = pd.DataFrame(results)

Combination: 1 / 112
Combination: 2 / 112
Combination: 3 / 112
Combination: 4 / 112
Combination: 5 / 112
Combination: 6 / 112
Combination: 7 / 112
Combination: 8 / 112
Combination: 9 / 112
Combination: 10 / 112
Combination: 11 / 112
Combination: 12 / 112
Combination: 13 / 112
Combination: 14 / 112
Combination: 15 / 112
Combination: 16 / 112
Combination: 17 / 112
Combination: 18 / 112
Combination: 19 / 112
Combination: 20 / 112
Combination: 21 / 112
Combination: 22 / 112
Combination: 23 / 112
Combination: 24 / 112
Combination: 25 / 112
Combination: 26 / 112
Combination: 27 / 112
Combination: 28 / 112
Combination: 29 / 112
Combination: 30 / 112
Combination: 31 / 112
Combination: 32 / 112
Combination: 33 / 112
Combination: 34 / 112
Combination: 35 / 112
Combination: 36 / 112
Combination: 37 / 112
Combination: 38 / 112
Combination: 39 / 112
Combination: 40 / 112
Combination: 41 / 112
Combination: 42 / 112
Combination: 43 / 112
Combination: 44 / 112
Combination: 45 / 112
Combination: 46 / 1

In [5]:
if num_combinations == 1:
    mpl.rcParams['figure.figsize'] = [20, 15]
    mpl.rcParams['axes.grid'] = True

    plt.plot(total_profits)
    plt.title('total profit vs iteration')
    plt.show()
    # plt.plot(total_profits[:200])
    # plt.show()

    plt.plot(all_profits)
    plt.title('profit vs iteration')
    plt.show()

    plt.scatter(x=mean_price_increases, y=all_profits)
    plt.xlabel('mean_price_increase')
    plt.ylabel('profit')
    plt.show()

    plt.scatter(x=median_price_increases, y=all_profits)
    plt.xlabel('median_price_increase')
    plt.ylabel('profit')
    plt.show()

In [7]:
results_df.sort_values("total_profit", ascending=False, inplace=True)

from datetime import datetime

def get_number_of_years(start_date, end_date):
    start = datetime.strptime(start_date, "%Y-%m-%d")
    end = datetime.strptime(end_date, "%Y-%m-%d")
    delta = end - start
    return delta.days / 365.25

years = get_number_of_years(cfg.start_date, cfg.end_date)
results_df['yearly_profit'] = pow(results_df['total_profit'], 1 / years)

results_df.to_csv(f'./outputs/nlargest {cfg.start_date} to {cfg.end_date}.csv', index=False)
print(results_df.to_markdown())

ValueError: No engine for filetype: 'csv'