<a href="https://colab.research.google.com/github/anirbanghoshsbi/.github.io/blob/master/work/temp_hosting/feature.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd

from statsmodels.tsa.stattools import adfuller

data = pd.read_csv('https://raw.githubusercontent.com/anirbanghoshsbi/.github.io/master/work/Composite_data.csv')

In [3]:
data_ffill=data.ffill()

In [4]:
data_ffill.set_index('date',inplace=True)

In [5]:
data_ffill.tail(2)

Unnamed: 0_level_0,FIICash,DIIcash,Percentage_above_5_dma,Percentage_above_10_dma,Above_15_dma,Percentage_above_20_dma,VIX,GoldPrice,IN10YR,IN10YR_yield,open,high,low,close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
29-04-2024,169.0,692.0,72,64,58,54,12.2125,61.299999,99.871,7.2,22475.551,22655.801,22441.9,22643.4
30-04-2024,1072.0,1430.0,62,62,60,60,12.87,60.880001,99.888,7.195,22679.65,22783.35,22568.4,22604.85


In [6]:
data_ffill.columns

Index(['FIICash', 'DIIcash', 'Percentage_above_5_dma',
       'Percentage_above_10_dma', 'Above_15_dma', 'Percentage_above_20_dma',
       'VIX', 'GoldPrice', 'IN10YR', 'IN10YR_yield', 'open', 'high', 'low',
       'close'],
      dtype='object')

In [7]:
def create_moving_average(df,windows,feature_name):
   for window in windows:
      column_name = f'{feature_name}MA_{window}'
      df[column_name] = df[feature_name].rolling(window=window).mean()
      column_name = f'{feature_name}ROC_{window}'
      df[column_name] = (df[feature_name] - df[feature_name].shift(1)) / df[feature_name].shift(1)

   return df


def create_lagged_features(df, lag, feature_name):
    """
    Create lagged versions of a variable in a DataFrame.

    Parameters:
        df (DataFrame): The DataFrame containing the variable.
        feature_name (str): The name of the column for which lagged versions will be created.
        lag (int): The number of lags to create.

    Returns:
        DataFrame: The DataFrame with lagged features added.
    """
    # Create a copy of the DataFrame to avoid modifying the original DataFrame
    df_copy = df.copy()

    # Create lagged versions of the variable
    for i in range(1, lag + 1):
        df_copy[f'{feature_name}_lag_{i}'] = df_copy[feature_name].shift(i)

    # Drop rows with NaN values introduced by shifting
    df_copy.dropna(inplace=True)

    return df_copy




def calculate_macd(df,feature_name,short_window=12, long_window=26, signal_window=9):
    """
    Calculate the Moving Average Convergence Divergence (MACD) indicator for a DataFrame.

    Parameters:
        df (DataFrame): The DataFrame containing the price data.
        short_window (int): The short moving average window (default: 12).
        long_window (int): The long moving average window (default: 26).
        signal_window (int): The signal line window (default: 9).

    Returns:
        DataFrame: The DataFrame with MACD values added.
    """
    # Calculate short and long Exponential Moving Averages (EMAs)
    short_ema = df[feature_name].ewm(span=short_window, min_periods=1, adjust=False).mean()
    long_ema = df[feature_name].ewm(span=long_window, min_periods=1, adjust=False).mean()

    # Calculate MACD line
    macd_line = short_ema - long_ema

    # Calculate signal line
    signal_line = macd_line.ewm(span=signal_window, min_periods=1, adjust=False).mean()

    # Calculate MACD histogram
    macd_histogram = macd_line - signal_line

    # Add MACD values to the DataFrame
    df[f'{feature_name}_MACD_Line'] = macd_line
    df[f'{feature_name}_Signal_Line'] = signal_line
    df[f'{feature_name}_MACD_Histogram'] = macd_histogram

    return df




In [8]:
def calculate_historical_volatility(df,feature_name, period=14):
    """
    Calculate the historical volatility for a DataFrame using only close prices.

    Parameters:
        df (DataFrame): The DataFrame containing close prices.
        period (int): The period for calculating the historical volatility (default: 14).

    Returns:
        DataFrame: The DataFrame with historical volatility values added.
    """
    # Calculate the percentage change in close prices
    df[f'{feature_name}_returns'] = df[feature_name].pct_change() * 100  # Calculate returns as percentage

    # Calculate the rolling standard deviation of the returns
    df[f'{feature_name}_historical_volatility'] = df['returns'].rolling(window=period).std()

    # Drop intermediate column
    df.drop('returns', axis=1, inplace=True)

    return df



def adf_test(df, window=20):
    """
    Calculate Augmented Dickey-Fuller (ADF) test statistics for a DataFrame.

    Parameters:
        df (DataFrame): The DataFrame containing the time series data.
        window (int): The window size for calculating rolling statistics (default: 20).

    Returns:
        DataFrame: The DataFrame with ADF test statistics added.
    """
    # Calculate rolling mean and rolling standard deviation
    rolling_mean = df['close'].rolling(window=window).mean()
    rolling_std = df['close'].rolling(window=window).std()

    # Perform ADF test
    adf_results = adfuller(df['close'])
    df['adf_test_statistic'] = adf_results[0]
    df['adf_test_p_value'] = adf_results[1]

    return df


def calculate_trend_indicators(df,feature_name,window=5):
    """
    Calculate trend indicators for a DataFrame using linear regression.

    Parameters:
        df (DataFrame): The DataFrame containing the time series data.
        window (int): The window size for linear regression (default: 20).

    Returns:
        DataFrame: The DataFrame with trend indicators added.
    """
    # Calculate linear regression coefficients
    df[f'{feature_name}_{window}_trend_slope'] = np.nan
    df[f'{feature_name}_{window}_trend_intercept'] = np.nan

    for i in range(window, len(df)):
        x = np.arange(window)
        y = df[feature_name].values[i - window:i]
        slope, intercept = np.polyfit(x, y, 1)
        df.at[df.index[i], f'{feature_name}_{window}_trend_slope'] = slope
        df.at[df.index[i], f'{feature_name}_{window}_trend_intercept'] = intercept

    return df








In [13]:
feature_name='FIICash'

In [None]:
def corr_rolling_feat_gen(df,feature_name,window_size):
    # Calculate the rolling correlation with a specified window size

    df[f'{feature_name}_Rolling_Corr_close'] = df[feature_name].rolling(window=window_size).corr(df['close'])
    return df



In [17]:
data_ffill.tail(2)

Unnamed: 0_level_0,FIICash,DIIcash,Percentage_above_5_dma,Percentage_above_10_dma,Above_15_dma,Percentage_above_20_dma,VIX,GoldPrice,IN10YR,IN10YR_yield,open,high,low,close,FIICash_5_trend_slope,FIICash_5_trend_intercept,FIICash_close,FIICash_Rolling_Corr_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
29-04-2024,169.0,692.0,72,64,58,54,12.2125,61.299999,99.871,7.2,22475.551,22655.801,22441.9,22643.4,-75.6,-2788.6,0.41048,0.41048
30-04-2024,1072.0,1430.0,62,62,60,60,12.87,60.880001,99.888,7.195,22679.65,22783.35,22568.4,22604.85,553.6,-3430.2,0.531632,0.531632


In [11]:
data_ffill=calculate_trend_indicators(data_ffill,feature_name)

In [12]:
window = [5,10,15,20,25]