In [None]:
# Imports
import sys
sys.path.append("Program")

import datetime as dt
from fundamentals import *
from helper_functions import get_current_date, get_df, get_volume5m_df, generate_end_dates, merge_stocks, stock_market
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
pd.options.mode.chained_assignment = None
from plot import *
from scipy.stats import linregress, pearsonr, ttest_ind
from stock_screener import get_stock_info, stoploss_target
from technicals import *

In [None]:
# Start of the program
start = dt.datetime.now()

# Variables
HKEX_all = False
NASDAQ_all = True
period_hk = 60 # Period for HK stocks
period_us = 252 # Period for US stocks
RS = 90
factors = [1, 1, 1]
backtest = False

# Index
index_name = "^GSPC"
index_dict = {"^HSI": "HKEX", "^GSPC": "S&P 500", "^IXIC": "NASDAQ Composite"}

# Get the infix
infix = get_infix(index_name, index_dict, NASDAQ_all)

# Get the current date
current_date = get_current_date(start, index_name)

# Define the result folder
result_folder = "Result"

In [None]:
# Choose the stocks
stocks = ["CLS", "LAZ", "SE", "UTI"]
for stock in stocks:
    df = get_df(stock, current_date)
    plot_close(stock, df)
    plot_volatility(stock, df)
    plot_ADX(stock, df)
    plot_MFI_RSI(stock, df)
    if stock.endswith(".HK"):
        plot_stocks(["^HSI", stock], current_date)
    else:
        plot_stocks(["^GSPC", stock], current_date)

In [None]:
# Get the stop loss and target price of a stock
stock = "UTI"
size = 15
industry = "Consumer defensive/Education"
entry_date = "2024-11-30"
entry_date_fmt = dt.datetime.strptime(entry_date, "%Y-%m-%d").strftime("%d-%m-%y")
df = get_df(stock, current_date)
current_close = df["Close"].iloc[-1]
entry = round(current_close, 2)
stoploss, stoploss_pct, target, target_pct = stoploss_target(stock, entry, entry_date)
print(f"Plan for {stock}.")
print(f"Current close: {round(current_close, 2)}.")
print(f"{stock} {size}% ({industry}) Entry {entry} ({entry_date_fmt}) SL {stoploss} ({stoploss_pct}%) TP {target} ({target_pct}%)")

In [None]:
stock = "9618.HK"
df = get_df(stock, current_date)
df["Row Number"] = range(1, len(df) + 1)
df = calculate_ndays_return(df, np.arange(5, 65, 5))
df = calculate_ndays_return(df, 1)
df = ADX(df)

In [None]:
df["Close 20 Days Ago"] = df["Close"].shift(20)
df["20 Days Percent Change"] = (df["Close"] / df["Close 20 Days Ago"] - 1)
df["ADX * 20 Days Percent Change"] = df["ADX"] * df["20 Days Percent Change"]
csv_date = (dt.datetime.strptime(current_date, "%Y-%m-%d") - relativedelta(years=5)).strftime("%Y-%m-%d")
df_filter = df[(df.index >= csv_date) & (csv_date <= "2024-09-12")]
df_inlier, df_outlier = filter_df_outlier(df_filter, "ADX * 20 Days Percent Change", 1)

# Remove dates with too few separation
rows = [df_outlier["Row Number"].iloc[0]]
for row in df_outlier["Row Number"][1:]:
    if row - rows[-1] >= 0:
        rows.append(row)
df_outlier = df_outlier[df_outlier["Row Number"].isin(rows)]

In [None]:
# Create a figure with three subplots
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8), gridspec_kw={"height_ratios": [3, 1, 1]}, sharex=True)

# Drop NaN values once for efficiency
filtered_index = df_filter.dropna().index
filtered_data = df_filter.dropna()

# Plot the first subplot
ax1.plot(filtered_index, filtered_data["Close"])
ax1.set_ylabel("Price")

# Set the x limit of the first subplot
buffer = relativedelta(days=1)
ax1.set_xlim(filtered_index[0] - buffer, filtered_index[-1] + buffer)

# Plot the second subplot
ax2.plot(filtered_index, filtered_data["ADX * 20 Days Percent Change Z-Score"])
ax2.set_ylabel("ADX * 20 Days Gain")

# Plot the third subplot
ax3.plot(filtered_index, filtered_data["ADX * 20 Days Percent Change Z-Score"], color="red")
ax3.set_ylabel("Z-Score")

# Add a red dotted line at y=2
ax3.axhline(y=2, color="red", linestyle="dotted")

# Add a red dotted line at y=-2
ax3.axhline(y=-2, color="red", linestyle="dotted")

# Set the x label and title
plt.xlabel("Date")
plt.suptitle(f"ADX * 20 Days Gain of {stock}")

# Adjust the spacing between subplots
plt.tight_layout()
plt.show()

In [None]:
return_60_inlier = df_inlier["60 Days Return (%)"].dropna()
return_60_outlier = df_outlier["60 Days Return (%)"].dropna()
adx_20pct_filter = df_filter.dropna()["ADX * 20 Days Percent Change"]
return_60_filter = df_filter["60 Days Return (%)"][adx_20pct_filter.index]
plot_ndays_return(stock, df.dropna(), 1)
plot_ndays_return(stock, df_inlier.dropna(), 60)
plot_ndays_return(stock, df_outlier.dropna(), 60)

In [None]:
# Perform a two-sample t-test
t_stat, p_value = ttest_ind(return_60_outlier, return_60_inlier, alternative="less")
print(t_stat, p_value)

In [None]:
dates = ["2020-08-17", "2020-11-16", 
         "2021-03-11", "2021-05-19", "2021-08-23", "2021-11-18", 
         "2022-03-10", "2022-05-17", "2022-08-23", "2022-11-18", 
         "2023-03-09", "2023-05-11", "2023-08-16", "2023-11-15", 
         "2024-03-06", "2024-05-16", "2024-08-15", "2024-11-14"]

eps = [1.96, 2.02, 
       0.891, 1.49, 1.74, 1.93, 
       1.37, 1.47, 2.33, 3.44, 
       2.71, 2.69, 2.89, 3.61, 
       2.88, 3.06, 5.09, 4.66]

eps_sp = [30.67, 30.32, 
           22.03, 14.62, -25.32, -3.59, 
           0, 0, 0, 0, 
           0, 0, 0, 0, 
           0, 0, 51.49, 12.56]

# Create the DataFrame
eps_df = pd.DataFrame({"Date": dates, "EPS": eps, "EPS Surprise (%)": eps_sp})

# Convert the "Date" column to datetime objects
eps_df["Date"] = pd.to_datetime(eps_df["Date"])
eps_df = eps_df.set_index("Date")
eps_df["EPS Percent Change (%)"] = eps_df["EPS"].pct_change() * 100

In [None]:
# Get the price data of HSI
hsi_df = get_df("^HSI", current_date)
hsi_df = calculate_ndays_return(hsi_df, 60)

In [None]:
# Initalize an empty column to store the 60 days return
eps_df["60 Days Return (%)"] = 0.0

for index, row in eps_df.iterrows():
    # Select matching rows
    rows = df[df.index >= index]
    hsi_rows = hsi_df[hsi_df.index >= index]
    
    if not rows.empty:
        date = rows.index.min()
        eps_df.loc[index, "60 Days Return (%)"] = df.loc[date, "60 Days Return (%)"]

    if not hsi_rows.empty:
        date = hsi_rows.index.min()
        eps_df.loc[index, "60 Days Return of HSI (%)"] = hsi_df.loc[date, "60 Days Return (%)"]

eps_df = eps_df.dropna()

In [None]:
eps_diff = eps_df["EPS Percent Change (%)"]
eps_sps = eps_df["EPS Surprise (%)"]
return_60 = eps_df["60 Days Return (%)"]
hsireturn_60 = eps_df["60 Days Return of HSI (%)"]

In [None]:
# Calculate correlation and p-value
corr_coeff, p_value = pearsonr(eps_diff, return_60)

# Create a figure
plt.figure(figsize=(10, 6))
plt.scatter(eps_diff, return_60, marker="x")

# Add correlation coefficient and p-value to the plot
plt.text(0.9, 0.9, f"correlation: {corr_coeff:.3e}\np-value: {p_value:.3e}", ha="right", va="top", transform=plt.gca().transAxes)

# Set the title and labels
plt.title(f"60 Days Return vs EPS Percent Change for {stock}")
plt.xlabel("EPS Percent Change (%)")
plt.ylabel("60 Days Return (%)")

# Show the plot
plt.show()

In [None]:
# Calculate correlation and p-value
corr_coeff, p_value = pearsonr(eps_sps, return_60)

# Create a figure
plt.figure(figsize=(10, 6))
plt.scatter(eps_sps, return_60, marker="x")

# Add correlation coefficient and p-value to the plot
plt.text(0.9, 0.9, f"correlation: {corr_coeff:.3e}\np-value: {p_value:.3e}", ha="right", va="top", transform=plt.gca().transAxes)

# Set the title and labels
plt.title(f"60 Days Return vs EPS Surprise for {stock}")
plt.xlabel("EPS Surprise (%)")
plt.ylabel("60 Days Return (%)")

# Show the plot
plt.show()

In [None]:
# Calculate correlation and p-value
corr_coeff, p_value = pearsonr(hsireturn_60, return_60)

# Create a figure
plt.figure(figsize=(10, 6))
plt.scatter(hsireturn_60, return_60, marker="x")

# Add correlation coefficient and p-value to the plot
plt.text(0.9, 0.9, f"correlation: {corr_coeff:.3e}\np-value: {p_value:.3e}", ha="right", va="top", transform=plt.gca().transAxes)

# Set the title and labels
plt.title(f"60 Days Return of {stock} vs HSI")
plt.xlabel("60 Days Return of HSI (%)")
plt.ylabel("60 Days Return (%)")

# Show the plot
plt.show()

In [None]:
# Calculate correlation and p-value
corr_coeff, p_value = pearsonr(adx_20pct_filter, return_60_filter)

# Perform linear regression
slope, intercept, r_value, p_value_reg, std_err = linregress(adx_20pct_filter, return_60_filter)

# Create a figure
plt.figure(figsize=(10, 6))
plt.scatter(adx_20pct_filter, return_60_filter, marker="x")

# Calculate best fit line
x_values = np.linspace(min(adx_20pct_filter), max(adx_20pct_filter), 100)
y_values = slope * x_values + intercept
label = rf"Best-fit line: $y={slope:.2f}x+{intercept:.2f}$" if intercept >= 0 else rf"Best-fit line: $y={slope:.2f}x-{np.abs(intercept):.2f}$"
plt.plot(x_values, y_values, color="red", label=label)

# Add correlation coefficient and p-value to the plot
plt.text(0.9, 0.9, f"correlation: {corr_coeff:.3e}\np-value: {p_value:.3e}", ha="right", va="top", transform=plt.gca().transAxes)

# Set the title and labels
plt.title(f"60 Days Return of {stock} vs ADX * 20 Days Percent Change")
plt.xlabel("ADX * 20 Days Percent Change")
plt.ylabel("60 Days Return (%)")
plt.legend()

# Show the plot
plt.show()

In [None]:
# Variables
end_date1 = "2024-09-12"
end_date2 = "2024-10-07"
period1 = 252
period2 = 20

# Get the infix
hsi_infix = get_infix("^HSI", index_dict, NASDAQ_all)
sp500_infix = get_infix("^GSPC", index_dict, NASDAQ_all)

# Get the list of stocks
hsi_stocks = stock_market(current_date, current_date, "^HSI", HKEX_all, NASDAQ_all)
sp500_stocks = stock_market(current_date, current_date, "^GSPC", HKEX_all, NASDAQ_all)

# Get the price data of the index
hsi_df = get_df("^HSI", current_date)
sp500_df = get_df("^GSPC", current_date)
merged_df = longshortRS(sp500_stocks, sp500_df, "^GSPC", index_dict, NASDAQ_all, current_date, end_date1, end_date2, period1, period2, result_folder, sp500_infix, volume_filter=500)

# Plot the long and short term RS
plot_longshortRS(merged_df, end_date1, end_date2)

In [None]:
# Define the end dates
end_dates = generate_end_dates(3, current_date, interval="1w")

# Compare the long and short term RS
rs_slopes, r_squareds, end_dates2 = compare_longshortRS(sp500_stocks, sp500_df, "^GSPC", index_dict, NASDAQ_all, current_date, end_dates, period1, period2, result_folder, hsi_infix)

In [None]:
# Plot the comparison between long and short term RS
plot_compare_longshortRS(sp500_df, "^GSPC", rs_slopes, r_squareds, end_dates, end_dates2)

In [None]:
show = 252 * 3
stocks = ["GC=F", "SI=F", "HG=F"]
metal_df = merge_stocks(stocks, current_date)
metal_df["Gold/Silver Ratio"] = metal_df["Close (GC=F)"] / metal_df["Close (SI=F)"]
metal_df["Gold/Copper Ratio"] = metal_df["Close (GC=F)"] / metal_df["Close (HG=F)"]
metal_df = calculate_ZScore(metal_df, ["Gold/Silver Ratio", "Gold/Copper Ratio"], 252)

# Restrict the dataframe
metal_df = metal_df[- show:]

# Create a figure with three subplots, one for the metal prices, one for the ratios, one for the ratios z-score
fig, (ax1, ax2, ax3) = plt.subplots(3, 1, figsize=(8, 8), gridspec_kw={"height_ratios": [3, 1, 1]}, sharex=True)

# Plot the metal prices on the first subplot
close_goldfirst = metal_df["Close (GC=F)"].iloc[0]
close_silverfirst = metal_df["Close (SI=F)"].iloc[0]
close_copperfirst = metal_df["Close (HG=F)"].iloc[0]
ax1.plot(100 / close_goldfirst * metal_df["Close (GC=F)"], label="Gold (scaled)", color="gold")
ax1.plot(100 / close_silverfirst * metal_df["Close (SI=F)"], label="Silver (scaled)", color="silver")
ax1.plot(100 / close_copperfirst * metal_df["Close (HG=F)"], label="Copper (scaled)", color="peru")

# Set the label of the first subplot
ax1.set_ylabel("Price")

# Set the x limit of the first subplot
ax1.set_xlim(metal_df.index[0], metal_df.index[-1])

# Plot the ratios on the second subplot
goldsilver_ratio_first = metal_df["Gold/Silver Ratio"].iloc[0]
goldcopper_ratio_first = metal_df["Gold/Copper Ratio"].iloc[0]
ax2.plot(100 / goldsilver_ratio_first * metal_df["Gold/Silver Ratio"], color="silver")
ax2.plot(100 / goldcopper_ratio_first * metal_df["Gold/Copper Ratio"], color="peru")

# Set the y label of the second subplot
ax2.set_ylabel("Ratio wrt Gold")

# Plot the ratios z-score on the third subplot
ax3.plot(metal_df["Gold/Silver Ratio Z-Score"], color="silver")
ax3.plot(metal_df["Gold/Copper Ratio Z-Score"], color="peru")
ax3.axhline(y=2, linestyle="dotted", label="Undervalued", color="green")
ax3.axhline(y=-2, linestyle="dotted", label="Overvalued", color="red")

# Set the y label of the third subplot
ax3.set_ylabel("Ratio z-score")

# Set the x label
plt.xlabel("Date")

# Set the title
plt.suptitle(f"Metal prices comparison")

# Combine the legends and place them at the top subplot
handles, labels = ax1.get_legend_handles_labels()
handles += ax3.get_legend_handles_labels()[0]
labels += ax3.get_legend_handles_labels()[1]
ax1.legend(handles, labels)

# Adjust the spacing between subplots
plt.tight_layout()

# Save the plot
plt.savefig("Result/Figure/metalcompare.png", dpi=300)    

# Show the plot
plt.show()