# Get Data

In [2]:
import pandas as pd
import os
from data_processing.process_raw_data import process_data


# Directory containing input files
input_directory = 'datasets/FA_StockPrices/'

# Directory to save processed files
output_directory = 'datasets/FA_StockPrices/processed/'

# List of input files
input_files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

# Process each input file
for input_file in input_files:
    # Generate input and output file paths
    input_path = os.path.join(input_directory, input_file)
    output_path = os.path.join(output_directory, f'{input_file}')

    # Process data
    processed_data = process_data(input_path)

    # Save processed data to a new CSV file
    processed_data.to_csv(output_path, index=False)

print("All files processed successfully.")


All files processed successfully.


In [3]:
import pandas as pd
import os
from data_processing.merge_raw_data import merge_data


# Directory containing input stock data files
stock_data_directory = 'datasets/FA_StockPrices/processed/'

# Directory containing polarity data
polarity_data_directory = 'datasets/FA_DataSet_XML/'

# Directory to save processed files
output_directory = 'datasets/FA_StockPrices/processed/merged/'

# List of stock files
stock_files = [f for f in os.listdir(stock_data_directory) if f.endswith('.csv')]

# Process each pair of stock and polarity data
for stock_file in stock_files:
    # Construct path for stock data
    stock_data_path = os.path.join(stock_data_directory, stock_file)

    # Process data
    merge_data(stock_data_path, polarity_data_directory, output_directory)


Processed data saved to: datasets/FA_StockPrices/processed/merged/khodro_merged_data_filtered.csv
Processed data saved to: datasets/FA_StockPrices/processed/merged/shabendar_merged_data_filtered.csv
Processed data saved to: datasets/FA_StockPrices/processed/merged/shapna_merged_data_filtered.csv
Processed data saved to: datasets/FA_StockPrices/processed/merged/vnaft_merged_data_filtered.csv
Processed data saved to: datasets/FA_StockPrices/processed/merged/zamyad_merged_data_filtered.csv


# DQL for Learning the actions

In [1]:
from models.DeepQLearningModel import DeepQLearningModel
import os

# Set the directory containing the CSV files
directory = 'datasets/FA_StockPrices/processed/merged/'
outdirectory = 'datasets/FA_StockPrices/processed/merged/out'

# Instantiate the DeepQLearningModel
model = DeepQLearningModel(directory, outdirectory)
    
# Main loop to process each CSV file
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        model.process_file(filename)



  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Processed file: khodro_merged_data_filtered.csv


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Processed file: shabendar_merged_data_filtered.csv


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Processed file: shapna_merged_data_filtered.csv


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step
Processed file: vnaft_merged_data_filtered.csv


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Processed file: zamyad_merged_data_filtered.csv


# making state space for regression

In [1]:
import os
import pandas as pd
import numpy as np

# Function to calculate moving average
def calculate_moving_average(data, window_size=7):
    return data.rolling(window=window_size).mean()

# Function to calculate exponential moving average
def calculate_exponential_moving_average(data, alpha=0.2):
    return data.ewm(alpha=alpha, adjust=False).mean()

# Set the directory containing the CSV files
input_directory = 'datasets/FA_StockPrices/processed/merged/out'
output_directory = 'datasets/FA_StockPrices/processed/merged/out/RegData'

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Get the list of CSV files
files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

for file in files:
    # Load the CSV file
    df = pd.read_csv(os.path.join(input_directory, file))

    # Select only the required columns
    df = df[['DTYYYYMMDD', 'FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action']]

    # Calculate moving average and exponential moving average
    df['Moving_Average'] = calculate_moving_average(df['FIRST']).fillna(0)
    df['Exponential_Moving_Average'] = calculate_exponential_moving_average(df['FIRST'])
    
    # Calculate return rate
    df['Return_Rate'] = (df['CLOSE'] - df['OPEN']) / df['OPEN']

    # Save the processed DataFrame to a new CSV file
    output_file = os.path.join(output_directory, f'RegData_{file}')
    df.to_csv(output_file, index=False)

    print(f'Processed file: {file}, Output saved to: {output_file}')


Processed file: output_khodro_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_khodro_merged_data_filtered.csv
Processed file: output_shabendar_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_shabendar_merged_data_filtered.csv
Processed file: output_shapna_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_shapna_merged_data_filtered.csv
Processed file: output_vnaft_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_vnaft_merged_data_filtered.csv
Processed file: output_zamyad_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_zamyad_merged_data_filtered.csv


In [2]:
import os
import pandas as pd
import numpy as np

# Set the input and output directories
input_directory = 'datasets/FA_StockPrices/processed/merged/out/RegData'
output_directory = 'datasets/FA_StockPrices/processed/merged/out/RegData/Processed'

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Function to construct the desired format for each row
def construct_row(current_row, prev_rows):
    row_values = list(current_row.values)  # Current row values
    for prev_row in prev_rows:
        row_values.extend(list(prev_row.values))  # Previous row values
    return row_values

# Get the list of input files
input_files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

# Iterate over each input file
for file in input_files:
    # Read the input CSV file
    df = pd.read_csv(os.path.join(input_directory, file))
    
    # Initialize a list to store processed data
    processed_data = []

    # Iterate over each row in the DataFrame
    for i in range(len(df)):
        # Get the current row and the previous 6 rows
        current_row = df.iloc[i][['DTYYYYMMDD', 'FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action',
                                  'Moving_Average', 'Exponential_Moving_Average', 'Return_Rate']]
        prev_rows = [df.iloc[i-j][['FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action',
                                   'Moving_Average', 'Exponential_Moving_Average', 'Return_Rate']] if i >= j else pd.Series([0] * 10) for j in range(1, 7)]
        
        # Construct the row with 71 columns
        row_values = construct_row(current_row, prev_rows)
        
        # Append the row to the processed data
        processed_data.append(row_values)

    # Define meaningful column names
    column_names = ['DTYYYYMMDD'] + \
                   [f'CURRENT_{col}' for col in ['FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action',
                                                  'Moving_Average', 'Exponential_Moving_Average', 'Return_Rate']] + \
                   [f'PREVIOUS_{i}_{col}' for i in range(1, 7) for col in ['FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action',
                                                                          'Moving_Average', 'Exponential_Moving_Average', 'Return_Rate']]

    # Create a DataFrame from the processed data
    processed_df = pd.DataFrame(processed_data, columns=column_names)

    # Save the processed DataFrame to a new CSV file
    output_file = os.path.join(output_directory, f'processed_{file}')
    processed_df.to_csv(output_file, index=False)

    print(f'Processed file: {file}, Output saved to: {output_file}')


Processed file: output_output_khodro_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_output_output_khodro_merged_data_filtered.csv
Processed file: output_output_shabendar_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_output_output_shabendar_merged_data_filtered.csv
Processed file: output_output_shapna_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_output_output_shapna_merged_data_filtered.csv
Processed file: output_output_vnaft_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_output_output_vnaft_merged_data_filtered.csv
Processed file: output_output_zamyad_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_output_output_zamyad_merged_data_filtered.csv
P

In [2]:
from data_processing.stock_Pre_Processing import stock_Pre_Processing
from data_processing.calculate_reward import calculate_reward

# Name of 5 Stocks  
stocks = ["khodro", "shabendar", "shapna", "vnaft", "zamyad"]
print(f"Number of Stocks: {len(stocks)}")

# Getting Data
start = "2016-01-01"
end = "2018-01-01"

for stock in stocks:
    # Preprocess data
    Hour = stock_Pre_Processing(stock, start, end, "60m")
    Day = stock_Pre_Processing(stock, start, end, "1d")
    Week = stock_Pre_Processing(stock, start, end, "1wk")
    
    # Reset indexes
    Hour.reset_index(drop=True, inplace=True)
    Day.reset_index(drop=True, inplace=True)
    Week.reset_index(drop=True, inplace=True)
    
    # Adding rewards to datasets
    Hour = calculate_reward(stock,Hour)
    Day = calculate_reward(stock,Day)
    Week = calculate_reward(stock,Week)

    # Save the datasets
    PATH = "datasets/FA_StockPrices/"
    Hour.to_csv(f"{PATH}{stock}_hour.csv", index=False)
    Day.to_csv(f"{PATH}{stock}_day.csv", index=False)
    Week.to_csv(f"{PATH}{stock}_week.csv", index=False)

Number of Stocks: 5
Date                2016-08-28 00:00:00
Open                             3210.0
High                             3200.0
Low                              3105.0
Close                            3143.0
Volume                         16126938
Average_Polarity               0.009942
Reward                        -0.010931
Action                                0
Name: 68, dtype: object
Date                2016-08-27 00:00:00
Open                             3244.0
High                             3249.0
Low                              3176.0
Close                            3210.0
Volume                         10758043
Average_Polarity               0.005263
Reward                        -0.005218
Action                                0
Name: 69, dtype: object
Date                2016-08-24 00:00:00
Open                             3240.0
High                             3279.0
Low                              3201.0
Close                            3244.0
Volume      

# DQN Model

In [3]:
from math import sqrt
from models.deep_q_trading_model import DeepQTrading
from utils.preprocessing import preprocess_data

deep_q_trading = DeepQTrading()
trained_models = {}
for company in ['vnaft', 'zamyad', 'khodro', 'shabendar', 'shapna']:
    lstm_model, gru_model, combined_model = deep_q_trading.run(company)
    trained_models[company] = {
        'lstm_model': lstm_model,
        'gru_model': gru_model,
        'combined_model': combined_model
    }
    print(f"Models for {company} trained and saved.")

Processing data for vnaft...


  super().__init__(**kwargs)
  super().__init__(**kwargs)


Models for vnaft trained and saved.
Processing data for zamyad...


  super().__init__(**kwargs)


Models for zamyad trained and saved.
Processing data for khodro...


  super().__init__(**kwargs)


Models for khodro trained and saved.
Processing data for shabendar...


  super().__init__(**kwargs)


Models for shabendar trained and saved.
Processing data for shapna...




Models for shapna trained and saved.


# DQN Evaluation

In [4]:
deep_q_trading = DeepQTrading()
for company, models in trained_models.items():
    X_train_scaled, X_val_scaled, X_test_scaled, y_train, y_val, y_test, RoR = deep_q_trading.load_data(company)
    initial_investment = 10000  # Assuming an initial investment of $10,000
        
    print(f"Evaluation Metrics for {company}:")
    print("LSTM Model:")
    lstm_metrics = deep_q_trading.evaluate_model(models['lstm_model'], X_test_scaled, y_test, initial_investment)
    deep_q_trading.print_metrics(lstm_metrics)
    print("GRU Model:")
    gru_metrics = deep_q_trading.evaluate_model(models['gru_model'], X_test_scaled, y_test, initial_investment)
    deep_q_trading.print_metrics(gru_metrics)
    print("Combined Model:")
    combined_metrics = deep_q_trading.evaluate_model(models['combined_model'], X_test_scaled, y_test, initial_investment)
    deep_q_trading.print_metrics(combined_metrics)


Processing data for vnaft...
Evaluation Metrics for vnaft:
LSTM Model:
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 727us/step
MSE: 0.00735753132553263
RMSE: 1.5614817745158034
MAE: 0.00022312734961870857
RoR: -0.004905732348561287
Return: -49.05732348561287
DD: 25.168120006073284
Sharpe Ratio: 1.000353380625403
MAPE: 401.1066948494391
GRU Model:
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 727us/step
MSE: 0.007101913377916208
RMSE: 0.08232287434682901
MAE: 0.0004930893562674553
RoR: -0.005102049559354782
Return: -51.02049559354782
DD: 4.228375618875576
Sharpe Ratio: 1.6355813515534547
MAPE: 425.1172120293791
Combined Model:
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 17ms/step
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 727us/step
MSE: 0.028776153360435854
RMSE: 