In [1]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.metrics import precision_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
import yfinance as yf

ticker = "TCS.NS"  # Example: TCS's stock ticker
index_ticker = "^CNXIT"  # NIFTY IT index on NSE
period = "10y"  # Last 5 years of data

# Fetch stock and index data
stock = yf.Ticker(ticker)
index = yf.Ticker(index_ticker)
data = stock.history(period=period)
index_data = index.history(period=period)

# Drop unnecessary columns
data = data.drop(columns=['Dividends', 'Stock Splits'])
index_data = index_data.drop(columns=['Dividends', 'Stock Splits'])

def run_experiment(n_stock, n_index):
    def calculate_features(prices, days):
        # Volatility: Average of percentage change over the past n days
        volatility = prices['Close'].pct_change().rolling(window=days).mean()

        # Momentum: Average directional change over the past n days
        momentum = np.sign(prices['Close'].diff()).rolling(window=days).mean()

        return volatility, momentum
    
    # Calculate stock and index features
    data['stock_volatility'], data['stock_momentum'] = calculate_features(data, n_stock)
    data['index_volatility'], data['index_momentum'] = calculate_features(index_data, n_index)

    # Create 'Tomorrow' and 'Target' columns for the stock
    data['Tomorrow'] = data['Close'].shift(-1)
    data['Target'] = (data['Tomorrow'] > data['Close']).astype(int)

    # Drop rows with NaN values due to rolling calculations
    prepared_data = data.dropna()

    # Define training/test split
    train_ratio = 0.9
    split_index = int(len(prepared_data) * train_ratio)
    train = prepared_data.iloc[:split_index]
    test = prepared_data.iloc[split_index:]

    # Separate features and target variables
    X_train = train[['stock_volatility', 'stock_momentum', 'index_volatility', 'index_momentum']]
    y_train = train['Target']
    X_test = test[['stock_volatility', 'stock_momentum', 'index_volatility', 'index_momentum']]
    y_test = test['Target']

    # Scale the features
    scaler = StandardScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    # Initialize and train the SVM model with RBF kernel
    model = SVC(kernel='rbf', gamma='scale')
    model.fit(X_train, y_train)

    # Make predictions and evaluate precision
    y_pred = model.predict(X_test)
    initial_precision = precision_score(y_test, y_pred)

    # Hyperparameter tuning with GridSearchCV
    param_grid = {
        'C': [0.1, 1, 10, 100],
        'kernel': ['rbf'],
        'gamma': [0.001, 0.01, 0.1, 1, 'auto', 'scale']
    }

    grid_search = GridSearchCV(SVC(), param_grid=param_grid, cv=3, verbose=0, n_jobs=-1)
    grid_search.fit(X_train, y_train)

    # Evaluate the optimized model
    best_model = grid_search.best_estimator_
    y_pred_best = best_model.predict(X_test)
    optimized_precision = precision_score(y_test, y_pred_best)

    return {
        'n_stock': n_stock,
        'n_index': n_index,
        'Initial precision': initial_precision,
        'Optimized precision': optimized_precision
    }

# Collect results for varying n_stock and n_index
results = []
for n_stock in [5,10,20,90,270]:
    for n_index in [5,10,20,90,270]:
        result = run_experiment(n_stock, n_index)
        results.append(result)

# Create a pandas DataFrame for tabular display
results_df = pd.DataFrame(results)
print(results_df)


    n_stock  n_index  Initial precision  Optimized precision
0         5        5           0.493056             0.475936
1         5       10           0.475936             0.484211
2         5       20           0.472826             0.469697
3         5       90           0.504587             0.486339
4         5      270           0.474576             0.453333
5        10        5           0.483444             0.476190
6        10       10           0.419643             0.475962
7        10       20           0.467105             0.479339
8        10       90           0.500000             0.489796
9        10      270           0.458824             0.467742
10       20        5           0.502924             0.502924
11       20       10           0.473684             0.481481
12       20       20           0.492228             0.481481
13       20       90           0.506579             0.483254
14       20      270           0.471910             0.465969
15       90        5    