In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

def calculate_trading_metrics(features_folder, y_test_folder, trading_metrics_folder):
    if not os.path.exists(trading_metrics_folder):
        os.makedirs(trading_metrics_folder)

    for feature_file in os.listdir(features_folder):
        if feature_file.endswith('_features.csv'):
            # Extract stock name from file name
            stock_name = feature_file.split('_features.csv')[0]
            
            print(feature_file)

            # Load feature dataset and y_test dataset
            features_data = pd.read_csv(os.path.join(features_folder, feature_file))
            y_test_data = pd.read_csv(os.path.join(y_test_folder, f'{stock_name}_y_test.csv'), index_col='Date', parse_dates=True)
             
                
            print(y_test_data)
            # Assuming 'next_day_expected_returns' is the column containing predictions
            predictions_series = y_test_data['next_day_expected_returns']

            # Convert index to datetime if needed
            predictions_series.index = pd.to_datetime(predictions_series.index)
            features_data.index = pd.to_datetime(features_data.index)

            # Align predictions with daily returns
            strategy_returns = predictions_series * features_data['daily_returns']
            strategy_returns = strategy_returns.clip(lower=-1)

            # Calculate cumulative returns
            cumulative_returns = (1 + strategy_returns / 100).cumprod() - 1
            
            print(cumulative_returns)

            # Calculate accuracy
            actual_returns = features_data['daily_returns']
            accuracy = (np.sign(predictions_series.shift(-1)) == np.sign(actual_returns)).mean()

            # Identify profitable trades
            profitable_trades = strategy_returns > 0

            # Calculate accuracy of profitable trades
            accuracy_profitable_trades = (profitable_trades.shift(-1) == (actual_returns > 0)).mean()

            # Calculate total returns
            total_returns = cumulative_returns.iloc[-1]

            # Calculate annual returns
            days = (cumulative_returns.index[-1] - cumulative_returns.index[0]).days
            years = days / 365
            annual_returns = (1 + total_returns) ** (1 / years) - 1

            # Save trading metrics to CSV file
            metrics_file_path = os.path.join(trading_metrics_folder, f'{stock_name}_trading_metrics.csv')
            trading_metrics = pd.DataFrame({
                'Cumulative Returns': cumulative_returns,
                'Accuracy': accuracy,
                'Accuracy of Profitable Trades': accuracy_profitable_trades,
                'Total Returns': total_returns,
                'Annual Returns': annual_returns
            })
            trading_metrics.to_csv(metrics_file_path)
            print(f'Trading metrics for {stock_name} saved successfully at {metrics_file_path}')

# Define folder paths
features_folder = 'features'
y_test_folder = 'y_test'
trading_metrics_folder = 'trading_metrics'

# Calculate trading metrics for each stock
calculate_trading_metrics(features_folder, y_test_folder, trading_metrics_folder)


HDFC_Bank_Limited_features.csv
            next_day_expected_returns
Date                                 
2019-06-10                        0.0
2019-06-11                       -1.0
2019-06-12                        0.0
2019-06-13                        0.0
2019-06-14                        0.0
...                               ...
2024-03-26                        1.0
2024-03-27                        0.0
2024-03-28                        1.0
2024-04-01                        0.0
2024-04-02                        0.0

[1192 rows x 1 columns]
1970-01-01 00:00:00.000000000   NaN
1970-01-01 00:00:00.000000001   NaN
1970-01-01 00:00:00.000000002   NaN
1970-01-01 00:00:00.000000003   NaN
1970-01-01 00:00:00.000000004   NaN
                                 ..
2024-03-26 00:00:00.000000000   NaN
2024-03-27 00:00:00.000000000   NaN
2024-03-28 00:00:00.000000000   NaN
2024-04-01 00:00:00.000000000   NaN
2024-04-02 00:00:00.000000000   NaN
Length: 7148, dtype: float64


ValueError: Can only compare identically-labeled Series objects

In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Function to import y_test datasets from y_test folder
def import_y_test_datasets(folder_path):
    datasets = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('_y_test.csv'):
            stock_name = filename.split('_y_test.csv')[0].strip('_')
            dataset_path = os.path.join(folder_path, filename)
            # Read the CSV file and set the 'Date' column as the index
            dataset = pd.read_csv(dataset_path, index_col='Date', parse_dates=True)
            datasets[stock_name] = dataset
    return datasets

# Function to import dataset for each stock from dataset folder
def import_stock_datasets(folder_path):
    datasets = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('_features.csv'):
            stock_name = filename.split('_features.csv')[0].strip('_')
            dataset_path = os.path.join(folder_path, filename)
            # Read the CSV file and set the 'Date' column as the index
            dataset = pd.read_csv(dataset_path, index_col='Date', parse_dates=True)
            datasets[stock_name] = dataset
    return datasets

# Function to calculate trading metrics
def calculate_trading_metrics(y_test_datasets, stock_datasets):
    for stock_name, y_test in y_test_datasets.items():
        print(f"Trading Metrics for {stock_name}:")
        # Get the corresponding dataset for the stock
        stock_data = stock_datasets.get(stock_name)
        if stock_data is None:
            print(f"No dataset found for {stock_name}. Skipping...")
            continue

        # Assuming 'y_test' is your Series with predictions indexed by date
        predictions_series = y_test  # This is already a Series, so no need to create a DataFrame

        # Convert index to datetime if it's not already, to ensure alignment
        predictions_series.index = pd.to_datetime(predictions_series.index)

        # Align predictions with daily returns from the stock dataset
        strategy_returns = predictions_series * stock_data['daily_returns']
        strategy_returns = strategy_returns.clip(lower=-1)

        # Calculate cumulative returns
        cumulative_returns = (1 + strategy_returns / 100).cumprod() - 1

        # Calculate Accuracy
        actual_returns = stock_data.loc[predictions_series.index, 'daily_returns']
        accuracy = (np.sign(predictions_series.shift(-1)) == np.sign(actual_returns)).mean()

        # Identify profitable trades
        profitable_trades = strategy_returns > 0

        # Calculate accuracy of profitable trades
        accuracy_profitable_trades = (profitable_trades.shift(-1) == (actual_returns > 0)).mean()

        # Calculate Total Returns
        total_returns = cumulative_returns.iloc[-1]

        # Calculate Annual Returns
        days = (cumulative_returns.index[-1] - cumulative_returns.index[0]).days
        years = days / 365
        annual_returns = (1 + total_returns) ** (1 / years) - 1

        print(f"Accuracy of Profitable Trades: {accuracy_profitable_trades:.2%}")
        print(f"Total Returns: {total_returns:.2%}")
        print(f"Annual Returns: {annual_returns:.2%}")
        print("-----------------------------------")

# Path to the y_test folder
y_test_folder_path = 'y_test/'

# Path to the dataset folder
dataset_folder_path = 'Datasets/'

# Import y_test datasets
y_test_datasets = import_y_test_datasets(y_test_folder_path)

print(y_test_datasets)

# Import stock datasets
stock_datasets = import_stock_datasets(dataset_folder_path)

print(stock_datasets)
# Calculate and print trading metrics for each stock
calculate_trading_metrics(y_test_datasets, stock_datasets)


{'HDFC_Bank_Limited':             next_day_expected_returns
Date                                 
2019-06-10                        0.0
2019-06-11                       -1.0
2019-06-12                        0.0
2019-06-13                        0.0
2019-06-14                        0.0
...                               ...
2024-03-26                        1.0
2024-03-27                        0.0
2024-03-28                        1.0
2024-04-01                        0.0
2024-04-02                        0.0

[1192 rows x 1 columns], 'Hindustan_Unilever_Limited':             next_day_expected_returns
Date                                 
2019-06-10                       -1.0
2019-06-11                        0.0
2019-06-12                        0.0
2019-06-13                       -1.0
2019-06-14                        0.0
...                               ...
2024-03-26                        0.0
2024-03-27                        1.0
2024-03-28                        1.0
2024-04-01

In [3]:
import pandas as pd
import numpy as np
import os

# Function to calculate trading metrics for each stock
def calculate_trading_metrics(predictions_folder, dataset_folder, trading_metrics_folder):
    if not os.path.exists(trading_metrics_folder):
        os.makedirs(trading_metrics_folder)
    
    for filename in os.listdir(predictions_folder):
        if filename.endswith('.csv'):
            stock_name = filename.split('_predictions.csv')[0].replace('_', ' ')
            predictions_path = os.path.join(predictions_folder, filename)
            dataset_path = os.path.join(dataset_folder, f"{stock_name}.csv")
            trading_metrics_path = os.path.join(trading_metrics_folder, f"{stock_name}_trading_metrics.csv")

            # Load predictions
            predictions = pd.read_csv(predictions_path, index_col='Date', parse_dates=True)
            # Load dataset
            dataset = pd.read_csv(dataset_path, index_col='Date', parse_dates=True)

            # Align dataset with predictions
            dataset = dataset.loc[predictions.index]

            # Calculate strategy returns
            strategy_returns = predictions['next_day_expected_returns'] * dataset['daily_returns']
            strategy_returns = strategy_returns.clip(lower=-1)

            # Calculate cumulative returns
            cumulative_returns = (1 + strategy_returns / 100).cumprod() - 1

            # Calculate accuracy of profitable trades
            actual_returns = dataset['daily_returns']
            profitable_trades = strategy_returns > 0
            accuracy_profitable_trades = (profitable_trades.shift(-1) == (actual_returns > 0)).mean()

            # Calculate total returns
            total_returns = cumulative_returns.iloc[-1]

            # Calculate annual returns
            days = (cumulative_returns.index[-1] - cumulative_returns.index[0]).days
            years = days / 365
            annual_returns = (1 + total_returns) ** (1 / years) - 1

            # Save trading metrics to CSV
            trading_metrics = pd.DataFrame({
                'Accuracy of Profitable Trades': [accuracy_profitable_trades],
                'Total Returns': [total_returns],
                'Annual Returns': [annual_returns]
            })
            trading_metrics.to_csv(trading_metrics_path)
            print(f"Trading metrics for {stock_name} saved at {trading_metrics_path}")

# Define folder paths
predictions_folder = 'y_test/'
dataset_folder = 'Datasets/'
trading_metrics_folder = 'trading_metrics'

# Calculate trading metrics for each stock
calculate_trading_metrics(predictions_folder, dataset_folder, trading_metrics_folder)


FileNotFoundError: [Errno 2] No such file or directory: 'Datasets/HDFC Bank Limited y test.csv.csv'

In [None]:
import os
import pandas as pd

# Function to import dataset for each stock from a folder
def import_stock_datasets(folder_path):
    datasets = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            stock_name = filename.split('.csv')[0].replace('_', ' ')
            dataset_path = os.path.join(folder_path, filename)
            # Read the CSV file and set the 'Date' column as the index
            dataset = pd.read_csv(dataset_path, index_col='Date', parse_dates=True)
            datasets[stock_name] = dataset
    return datasets

# Path to the dataset folder
folder_path = 'datasets/'

# Import datasets from the folder
stock_datasets = import_stock_datasets(folder_path)

# Print the last row of each dataset
for stock_name, dataset in stock_datasets.items():
    print(f"Last row of {stock_name} dataset:")
    print(dataset.tail(1))


In [4]:
import os
import pandas as pd
import numpy as np

# Function to import y_test datasets from y_test folder
def import_y_test_datasets(folder_path):
    datasets = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('_y_test.csv'):
            stock_name = filename.split('_y_test.csv')[0].strip('_')
            dataset_path = os.path.join(folder_path, filename)
            # Read the CSV file and set the 'Date' column as the index
            dataset = pd.read_csv(dataset_path, index_col='Date', parse_dates=True)
            datasets[stock_name] = dataset
    return datasets


# Function to import dataset for each stock from a folder
def import_stock_datasets(folder_path):
    datasets = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            stock_name = filename.split('.csv')[0].replace('_', ' ')
            dataset_path = os.path.join(folder_path, filename)
            # Read the CSV file and set the 'Date' column as the index
            dataset = pd.read_csv(dataset_path, index_col='Date', parse_dates=True)
            datasets[stock_name] = dataset
    return datasets




# Function to calculate trading metrics
def calculate_trading_metrics(y_test_datasets, stock_datasets):
    for stock_name, y_test in y_test_datasets.items():
        print(f"Trading Metrics for {stock_name}:")
        # Get the corresponding dataset for the stock
        stock_data = stock_datasets.get(stock_name)
        if stock_data is None:
            print(f"No dataset found for {stock_name}. Skipping...")
            continue

        # Assuming 'y_test' is your Series with predictions indexed by date
        predictions_series = y_test  # This is already a Series, so no need to create a DataFrame

        # Convert index to datetime if it's not already, to ensure alignment
        predictions_series.index = pd.to_datetime(predictions_series.index)

        # Align predictions with daily returns from the stock dataset
        strategy_returns = predictions_series * stock_data['daily_returns']
        strategy_returns = strategy_returns.clip(lower=-1)

        # Calculate cumulative returns
        cumulative_returns = (1 + strategy_returns / 100).cumprod() - 1

        # Calculate Accuracy
        actual_returns = stock_data.loc[predictions_series.index, 'daily_returns']
        accuracy = (np.sign(predictions_series.shift(-1)) == np.sign(actual_returns)).mean()

        # Identify profitable trades
        profitable_trades = strategy_returns > 0

        # Calculate accuracy of profitable trades
        accuracy_profitable_trades = (profitable_trades.shift(-1) == (actual_returns > 0)).mean()

        # Calculate Total Returns
        total_returns = cumulative_returns.iloc[-1]

        # Calculate Annual Returns
        days = (cumulative_returns.index[-1] - cumulative_returns.index[0]).days
        years = days / 365
        annual_returns = (1 + total_returns) ** (1 / years) - 1

        print(f"Accuracy of Profitable Trades: {accuracy_profitable_trades:.2%}")
        print(f"Total Returns: {total_returns:.2%}")
        print(f"Annual Returns: {annual_returns:.2%}")
        print("-----------------------------------")

# Path to the y_test folder
y_test_folder_path = 'y_test/'

# Path to the dataset folder
folder_path = 'datasets/'

# Import datasets from the folder
stock_datasets = import_stock_datasets(folder_path)

# Import y_test datasets
y_test_datasets = import_y_test_datasets(y_test_folder_path)



# Calculate and print trading metrics for each stock
calculate_trading_metrics(y_test_datasets, stock_datasets)


Trading Metrics for HDFC_Bank_Limited:
No dataset found for HDFC_Bank_Limited. Skipping...
Trading Metrics for Hindustan_Unilever_Limited:
No dataset found for Hindustan_Unilever_Limited. Skipping...
Trading Metrics for ITC_Limited:
No dataset found for ITC_Limited. Skipping...
Trading Metrics for Kotak_Mahindra_Bank_Limited:
No dataset found for Kotak_Mahindra_Bank_Limited. Skipping...
Trading Metrics for Larsen_&_Toubro_Limited:
No dataset found for Larsen_&_Toubro_Limited. Skipping...
Trading Metrics for Oil_and_Natural_Gas_Corporation_Limited:
No dataset found for Oil_and_Natural_Gas_Corporation_Limited. Skipping...
Trading Metrics for Reliance_Industries_Limited:
No dataset found for Reliance_Industries_Limited. Skipping...
Trading Metrics for State_Bank_of_India:
No dataset found for State_Bank_of_India. Skipping...
Trading Metrics for Tata_Consultancy_Services_Limited:
No dataset found for Tata_Consultancy_Services_Limited. Skipping...
Trading Metrics for Tata_Motors_Limited:
No

In [None]:
stock_datasets

In [None]:
y_test_datasets

In [6]:
import os
import pandas as pd
import numpy as np

# Function to calculate daily returns
def calculate_daily_returns(df):
    df['daily_returns'] = df['Close'].pct_change() * 100
    return df

# Function to import y_test datasets from y_test folder
def import_y_test_datasets(folder_path):
    datasets = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('_y_test.csv'):
            stock_name = filename.split('_y_test.csv')[0].strip('_')
            dataset_path = os.path.join(folder_path, filename)
            # Read the CSV file and set the 'Date' column as the index
            dataset = pd.read_csv(dataset_path, index_col='Date', parse_dates=True)
            datasets[stock_name] = dataset
    return datasets

# Function to import dataset for each stock from a folder
def import_stock_datasets(folder_path):
    datasets = {}
    for filename in os.listdir(folder_path):
        if filename.endswith('.csv'):
            stock_name = filename.split('.csv')[0].replace('_', ' ')
            dataset_path = os.path.join(folder_path, filename)
            # Read the CSV file and set the 'Date' column as the index
            dataset = pd.read_csv(dataset_path, index_col='Date', parse_dates=True)
            # Calculate daily returns and add them as a new column
            dataset = calculate_daily_returns(dataset)
            datasets[stock_name] = dataset
    return datasets

# Function to calculate trading metrics
def calculate_trading_metrics(y_test_datasets, stock_datasets):
    for stock_name, y_test in y_test_datasets.items():
        print(f"Trading Metrics for {stock_name}:")
        
        # Replace underscores with spaces in stock_name extracted from y_test filenames
        stock_name = stock_name.replace('_', ' ')
        
        # Get the corresponding dataset for the stock
        stock_data = stock_datasets.get(stock_name)
        
        if stock_data is None:
            print(f"No dataset found for {stock_name}. Skipping...")
            continue

        # Assuming 'y_test' is your Series with predictions indexed by date
        predictions_series = y_test  # This is already a Series, so no need to create a DataFrame

        # Convert index to datetime if it's not already, to ensure alignment
        predictions_series.index = pd.to_datetime(predictions_series.index)

        # Align predictions with daily returns from the stock dataset
        strategy_returns = predictions_series * stock_data['daily_returns']
        strategy_returns = strategy_returns.clip(lower=-1)

        # Calculate cumulative returns
        cumulative_returns = (1 + strategy_returns / 100).cumprod() - 1

        # Calculate Accuracy
        actual_returns = stock_data.loc[predictions_series.index, 'daily_returns']
        # Align the indices of predictions_series and actual_returns before comparing
        predictions_series, actual_returns = predictions_series.align(actual_returns, axis=0, join='inner')
        accuracy = (np.sign(predictions_series.shift(-1)) == np.sign(actual_returns)).mean()

        # Identify profitable trades
        profitable_trades = strategy_returns > 0

        # Calculate accuracy of profitable trades
        accuracy_profitable_trades = (profitable_trades.shift(-1) == (actual_returns > 0)).mean()

        # Calculate Total Returns
        total_returns = cumulative_returns.iloc[-1]

        # Calculate Annual Returns
        days = (cumulative_returns.index[-1] - cumulative_returns.index[0]).days
        years = days / 365
        annual_returns = (1 + total_returns) ** (1 / years) - 1

        print(f"Accuracy of Profitable Trades: {accuracy_profitable_trades:.2%}")
        print(f"Total Returns: {total_returns:.2%}")
        print(f"Annual Returns: {annual_returns:.2%}")
        print("-----------------------------------")

# Path to the y_test folder
y_test_folder_path = 'y_test/'

# Path to the dataset folder
folder_path = 'datasets/'

# Import datasets from the folder
stock_datasets = import_stock_datasets(folder_path)

# Import y_test datasets
y_test_datasets = import_y_test_datasets(y_test_folder_path)

# Calculate and print trading metrics for each stock
calculate_trading_metrics(y_test_datasets, stock_datasets)


Trading Metrics for HDFC_Bank_Limited:


ValueError: Operands are not aligned. Do `left, right = left.align(right, axis=1, copy=False)` before operating.

In [None]:
stock_datasets