# technicals

> This module implements various methods for adding several technical indicators and metrics to the raw bhavcopy data.

In [None]:
#| default_exp technicals

In [None]:

#| hide
from nbdev.showdoc import *

In [None]:
#| export
import pandas as pd
import os
import numpy as np
from pathlib import Path
import pandas_ta as pdta
import nbdev

In [None]:
#| export
base_path = nbdev.config.get_config().lib_path

In [None]:
#| export
raw_data_dir = base_path / "../Data/Bhavcopy/Raw"
processed_data_dir = base_path / "../Data/Bhavcopy/Processed"

In [None]:
#| export
bhavcopy_dtypes = {
    "SYMBOL": 'string',
    "SERIES": 'string',
    "OPEN": 'float64',
    "HIGH": 'float64',
    "LOW": 'float64',
    "CLOSE": 'float64',
    # "LAST": 'float64',
    # "PREVCLOSE": 'float64',
    "TOTTRDQTY": 'int64',
    "TOTTRDVAL": 'float64',
    "TIMESTAMP": 'string',
    "TOTALTRADES": 'int64',
    # "ISIN": 'string',
    # "Unnamed: 13": 'string',
}

bhavcopy_usecols = [
    "SYMBOL",
    "SERIES",
    "OPEN",
    "HIGH",
    "LOW",
    "CLOSE",
    # "LAST",
    # "PREVCLOSE",
    "TOTTRDQTY",
    "TOTTRDVAL",
    "TIMESTAMP",
    "TOTALTRADES",
    # "ISIN",
    # "Unnamed: 13",
]

In [None]:
#| export
# Read single day bhavcopy file
def load_bhavcopy(file_path, stocks_list=None):
    df = pd.read_csv(
        file_path,
        dtype=bhavcopy_dtypes,
        usecols=bhavcopy_usecols,
        parse_dates=["TIMESTAMP"],
        dayfirst=False,
    )
    return df[df.SYMBOL.isin(stocks_list)] if stocks_list else df

In [None]:
#| export
# Preprocess bhavcopy data
def preprocess(df):
    return (df
            .pipe(lambda x: x[x["SERIES"] == "EQ"])
            .assign(
                DATE=pd.to_datetime(df.TIMESTAMP, format="%d-%b-%Y").dt.date,
                DAY_OF_WEEK = pd.to_datetime(df.TIMESTAMP, format="%d-%b-%Y").dt.day_name(),
                WEEK_NUM = pd.to_datetime(df.TIMESTAMP, format="%d-%b-%Y").dt.isocalendar().week,  
            )
            .drop(columns=["TIMESTAMP",])  
            .sort_values(["SYMBOL", "DATE"])
            .reset_index(drop=True)
            # .set_index("DATE")
        )

In [None]:
#| export
# Generate simple moving average data
def get_sma(df_symbol, period=20, metric="CLOSE"):
    if metric.upper() in ["CLOSE", "OPEN", "HIGH", "LOW"]:
        return pdta.sma(df_symbol[metric], length=period).rename(f"SMA_{period}_{metric.upper()[0]}")
    else:
        raise ValueError("Invalid metric")

In [None]:
#| export
# Generate bollinger bands data
def get_bollinger_bands(df_symbol, period=20, std=2):
    return (pdta.bbands(df_symbol.CLOSE, length=period, std=std)
            .rename(columns={
                f"BBU_{period}_{std:.1f}": f"BBU_{period}_{std}",
                f"BBM_{period}_{std:.1f}": f"BBM_{period}_{std}",
                f"BBL_{period}_{std:.1f}": f"BBL_{period}_{std}",
                f"BBB_{period}_{std:.1f}": f"BBB_{period}_{std}", 
                f"BBP_{period}_{std:.1f}": f"BBP_{period}_{std}",
                }))

In [None]:
#| export
# Generate donchian channel data
def get_donchian(df_symbol, upper=22, lower=66):
    return (pdta.donchian(df_symbol.HIGH, df_symbol.LOW, lower_length=66, upper_length=22)
                .rename(
                    columns={
                        f"DCL_{lower}_{upper}": f"DONCHIAN_L{lower}", 
                        f"DCU_{lower}_{upper}": f"DONCHIAN_U{upper}"})
                .drop(columns=[f"DCM_{lower}_{upper}"])
                )

In [None]:
#| export
# Generate supertrend data
def get_supertrend(df_symbol, period=10, multiplier=3):
    return (pdta.supertrend(
        df_symbol.HIGH, df_symbol.LOW, df_symbol.CLOSE,
        length=period, multiplier=multiplier
        )
        .drop(columns=[
            f"SUPERT_{period}_{multiplier:.1f}",
            f"SUPERTl_{period}_{multiplier:.1f}",
            f"SUPERTs_{period}_{multiplier:.1f}",
            ])
        .rename(columns={
            f"SUPERTd_{period}_{multiplier:.1f}": f"SUPTR_{period}_{multiplier}"
            })
        )

In [None]:
#| export
# Generate candlestick data
def get_candle_stats(df_symbol):
    df_tmp = pd.DataFrame()
    return (df_tmp
            .assign(
        CDL_COLOR=df_symbol.apply(lambda x: 'green' if x.CLOSE > x.OPEN else 'red', axis=1).astype('string'),
        CDL_SIZE = abs(df_symbol.CLOSE - df_symbol.OPEN),
        TOPWICK_SIZE = df_symbol.HIGH - df_symbol[["OPEN", "CLOSE"]].max(axis=1),
        BOTWICK_SIZE = df_symbol[["OPEN", "CLOSE"]].min(axis=1) - df_symbol.LOW,
    )
    )

In [None]:
#| export
# Generate misc technical indicators
def get_misc_stats(df_symbol):
    return (df_symbol
            .assign(
                # CDL_SIZE_AVG_22 = df_symbol.CDL_SIZE.rolling(22).mean(),
                # CDL_SIZE_AVG_12 = df_symbol.CDL_SIZE.rolling(12).mean(),
                # CDL_SIZE_MAX_22 = df_symbol.CDL_SIZE.rolling(22).max(),
                # CDL_SIZE_MAX_12 = df_symbol.CDL_SIZE.rolling(12).max(),
                DIFF_20_44 = (df_symbol.SMA_20_C - df_symbol.SMA_44_C).abs(),
                DIFF_20_200 = (df_symbol.SMA_20_C - df_symbol.SMA_200_C).abs(),
                DIFF_20_CLOSE = (df_symbol.SMA_20_C - df_symbol.CLOSE).abs(),
                DIFF_44_CLOSE = (df_symbol.SMA_44_C - df_symbol.CLOSE).abs(),
                DIFF_200_CLOSE = (df_symbol.SMA_200_C - df_symbol.CLOSE).abs(),
                # AVGTRDQTY=df_symbol.TOTTRDQTY / df_symbol.TOTALTRADES,
                # AVGTRDVAL=df_symbol.TOTTRDVAL / df_symbol.TOTALTRADES,
            )
    )


In [None]:
#| export
# Generate all technicals for a symbol data
def add_all_technicals(df_symbol):
    df_symbol = df_symbol.reset_index().sort_values(by="DATE", inplace=False)
    return (pd.concat(
        [
            df_symbol,
            get_sma(df_symbol, 20, "CLOSE"),
            get_sma(df_symbol, 20, "HIGH"),
            get_sma(df_symbol, 44),
            get_sma(df_symbol, 200),
            get_bollinger_bands(df_symbol, 20, 2),
            
            # get_donchian(df_symbol, upper=22, lower=66),
            # get_supertrend(df_symbol, period=12, multiplier=3),
            # get_supertrend(df_symbol, period=11, multiplier=2),
            # get_supertrend(df_symbol, period=10, multiplier=1),
            get_candle_stats(df_symbol),
        ],
        axis=1,
        )
            # .pipe(get_misc_stats)
            
            # .assign(
            #     DATE = df_symbol.DATE.dt.strftime("%Y-%M-%d"),
            # )
              )

In [None]:
#| export

# Get list of all csv files in raw_data_dir
csv_files = [f for f in raw_data_dir.iterdir() if f.suffix == '.csv']

# Read all csv files into a single dataframe
df = pd.concat(
        [load_bhavcopy(os.path.join(raw_data_dir, f), None) for f in csv_files[:300]],
        ignore_index=True
        )

df = preprocess(df)

In [None]:
#| export
for symbol, df_symbol in df.groupby("SYMBOL"):
    if len(df_symbol) > 200:
        df_symbol = add_all_technicals(df_symbol)
        
        # display(df_symbol)
        # break
        # print(df_symbol.columns)
        
        file_path = processed_data_dir / f"{symbol}.parquet"
        df_symbol.to_parquet(file_path, index=True)
        print(f"Saved {file_path}")

Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\20MICRONS.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\3IINFOTECH.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\3MINDIA.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\5PAISA.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\63MOONS.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\A2ZINFRA.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\AARTIDRUGS.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\AARTIIND.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\AARVEEDEN.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Processed\ABAN.parquet
Saved c:\MyData\TechWork\stocksurfer\stocksurfer\..\Data\Bhavcopy\Proces

In [None]:

# TODO multiprocessing inside Jupyter notebook
# with multiprocessing.Pool(processes=8) as pool:
#     for result in pool.imap(enrich_symbol_data, all_symbol_data):
#         symbol = result.SYMBOL.iloc[0]
#         file_path = os.path.join(processed_data_dir, f"{symbol}.csv")
#         result.to_csv(file_path, index=True)
#         print(f"Saved {file_path}")


In [None]:
#| hide
nbdev.nbdev_export()