In [1]:
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import yfinance as yf

ticker = "TCS.NS"  # Example: TCS's stock ticker
index_ticker = "^CNXIT"  # NIFTY IT index on NSE
period = "10y"  # Last 10 years of data

# Fetch stock and index data
stock = yf.Ticker(ticker)
index = yf.Ticker(index_ticker)
data = stock.history(period=period)
index_data = index.history(period=period)

# Drop unnecessary columns
data = data.drop(columns=['Dividends', 'Stock Splits'])
index_data = index_data.drop(columns=['Dividends', 'Stock Splits'])

def run_experiment(n_stock, n_index):
    def calculate_features(prices, days):
        # Volatility: Average of percentage change over the past n days
        volatility = prices['Close'].pct_change().rolling(window=days).mean()

        # Momentum: Average directional change over the past n days
        momentum = np.sign(prices['Close'].diff()).rolling(window=days).mean()

        return volatility, momentum
    
    # Calculate stock and index features
    data['stock_volatility'], data['stock_momentum'] = calculate_features(data, n_stock)
    data['index_volatility'], data['index_momentum'] = calculate_features(index_data, n_index)

    # Create 'Tomorrow' and 'Target' columns for the stock
    data['Tomorrow'] = data['Close'].shift(-1)
    data['Target'] = (data['Tomorrow'] > data['Close']).astype(int)

    # Drop rows with NaN values due to rolling calculations
    prepared_data = data.dropna()

    # Define training/test split
    train_ratio = 0.9
    split_index = int(len(prepared_data) * train_ratio)
    train = prepared_data.iloc[:split_index]
    test = prepared_data.iloc[split_index:]

    # Separate features and target variables
    X_train = train[['stock_volatility', 'stock_momentum', 'index_volatility', 'index_momentum']]
    y_train = train['Target']
    X_test = test[['stock_volatility', 'stock_momentum', 'index_volatility', 'index_momentum']]
    y_test = test['Target']

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Initialize and train the Random Forest model
    model = RandomForestClassifier(random_state=26)
    model.fit(X_train, y_train)

    # Make predictions and evaluate accuracy
    y_pred = model.predict(X_test)
    initial_precision = precision_score(y_test, y_pred)

    # Hyperparameter tuning with GridSearchCV
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    grid_search = GridSearchCV(RandomForestClassifier(random_state=26), param_grid=param_grid, cv=3, verbose=0, n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Evaluate the optimized model
    best_model = grid_search.best_estimator_
    y_pred_best = best_model.predict(X_test)
    optimized_precision = precision_score(y_test, y_pred_best)

    return {
        'n_stock': n_stock,
        'n_index': n_index,
        'Initial precision': initial_precision,
        'Optimized precision': optimized_precision
    }

# Collect results for varying n_stock and n_index
results = []
for n_stock in [5,10,20,90,270]:
    for n_index in [5,10,20,90,270]:
        result = run_experiment(n_stock, n_index)
        results.append(result)

# Create a pandas DataFrame for tabular display
results_df = pd.DataFrame(results)
print(results_df)

    n_stock  n_index  Initial precision  Optimized precision
0         5        5           0.504854             0.486239
1         5       10           0.422680             0.435897
2         5       20           0.477477             0.432432
3         5       90           0.594340             0.565217
4         5      270           0.468468             0.424000
5        10        5           0.435185             0.458333
6        10       10           0.523810             0.520000
7        10       20           0.464286             0.478992
8        10       90           0.554455             0.512000
9        10      270           0.475806             0.467213
10       20        5           0.495146             0.518182
11       20       10           0.538462             0.492308
12       20       20           0.448000             0.453901
13       20       90           0.485981             0.486239
14       20      270           0.478571             0.478571
15       90        5    

In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import yfinance as yf

ticker = "TCS.NS"  # Example: TCS's stock ticker
index_ticker = "^CNXIT"  # NIFTY IT index on NSE
period = "10y"  # Last 10 years of data

# Fetch stock and index data
stock = yf.Ticker(ticker)
index = yf.Ticker(index_ticker)
data = stock.history(period=period)
index_data = index.history(period=period)

# Drop unnecessary columns
data = data.drop(columns=['Dividends', 'Stock Splits'])
index_data = index_data.drop(columns=['Dividends', 'Stock Splits'])

def run_experiment(n_stock, n_index):
    def calculate_features(prices, days):
        # Volatility: Average of percentage change over the past n days
        volatility = prices['Close'].pct_change().rolling(window=days).mean()

        # Momentum: Average directional change over the past n days
        momentum = np.sign(prices['Close'].diff()).rolling(window=days).mean()

        return volatility, momentum
    
    # Calculate stock and index features
    data['stock_volatility'], data['stock_momentum'] = calculate_features(data, n_stock)
    data['index_volatility'], data['index_momentum'] = calculate_features(index_data, n_index)

    # Create 'Tomorrow' and 'Target' columns for the stock
    data['Tomorrow'] = data['Close'].shift(-1)
    data['Target'] = (data['Tomorrow'] > data['Close']).astype(int)

    # Drop rows with NaN values due to rolling calculations
    prepared_data = data.dropna()

    # Define training/test split
    train_ratio = 0.9
    split_index = int(len(prepared_data) * train_ratio)
    train = prepared_data.iloc[:split_index]
    test = prepared_data.iloc[split_index:]

    # Separate features and target variables
    X_train = train[['stock_volatility', 'stock_momentum', 'index_volatility', 'index_momentum']]
    y_train = train['Target']
    X_test = test[['stock_volatility', 'stock_momentum', 'index_volatility', 'index_momentum']]
    y_test = test['Target']

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Initialize and train the Logistic Regression model
    model = LogisticRegression(max_iter=1000, random_state=16)
    model.fit(X_train, y_train)

    # Make predictions and evaluate accuracy
    y_pred = model.predict(X_test)
    initial_precision = precision_score(y_test, y_pred)

    # Hyperparameter tuning with GridSearchCV
    param_grid = {
        'C': [0.01, 0.1, 1, 10, 100],  # Regularization strength
        'solver': ['liblinear', 'lbfgs'],  # Solvers for optimization
        'penalty': ['l2'],  # Regularization type (l2 is most common)
    }

    grid_search = GridSearchCV(LogisticRegression(max_iter=1000, random_state=16), param_grid=param_grid, cv=3, verbose=0, n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Evaluate the optimized model
    best_model = grid_search.best_estimator_
    y_pred_best = best_model.predict(X_test)
    optimized_precision = precision_score(y_test, y_pred_best)

    return {
        'n_stock': n_stock,
        'n_index': n_index,
        'Initial precision': initial_precision,
        'Optimized precision': optimized_precision
    }

# Collect results for varying n_stock and n_index
results = []
for n_stock in [5,10,20,90,270]:
    for n_index in [5,10,20,90,270]:
        result = run_experiment(n_stock, n_index)
        results.append(result)

# Create a pandas DataFrame for tabular display
results_df = pd.DataFrame(results)
print(results_df)


    n_stock  n_index  Initial precision  Optimized precision
0         5        5           0.483696             0.492386
1         5       10           0.485876             0.483871
2         5       20           0.494118             0.497110
3         5       90           0.477124             0.493976
4         5      270           0.475936             0.473118
5        10        5           0.485549             0.482955
6        10       10           0.463277             0.476923
7        10       20           0.500000             0.493902
8        10       90           0.481707             0.481081
9        10      270           0.468085             0.465969
10       20        5           0.485714             0.485714
11       20       10           0.487047             0.479592
12       20       20           0.490446             0.490446
13       20       90           0.491018             0.477778
14       20      270           0.465969             0.465969
15       90        5    