# Get Data

In [2]:
import pandas as pd
import os
from data_processing.process_raw_data import process_data


# Directory containing input files
input_directory = 'datasets/FA_StockPrices/'

# Directory to save processed files
output_directory = 'datasets/FA_StockPrices/processed/'

# List of input files
input_files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

# Process each input file
for input_file in input_files:
    # Generate input and output file paths
    input_path = os.path.join(input_directory, input_file)
    output_path = os.path.join(output_directory, f'{input_file}')

    # Process data
    processed_data = process_data(input_path)

    # Save processed data to a new CSV file
    processed_data.to_csv(output_path, index=False)

print("All files processed successfully.")


All files processed successfully.


In [3]:
import pandas as pd
import os
from data_processing.merge_raw_data import merge_data


# Directory containing input stock data files
stock_data_directory = 'datasets/FA_StockPrices/processed/'

# Directory containing polarity data
polarity_data_directory = 'datasets/FA_DataSet_XML/'

# Directory to save processed files
output_directory = 'datasets/FA_StockPrices/processed/merged/'

# List of stock files
stock_files = [f for f in os.listdir(stock_data_directory) if f.endswith('.csv')]

# Process each pair of stock and polarity data
for stock_file in stock_files:
    # Construct path for stock data
    stock_data_path = os.path.join(stock_data_directory, stock_file)

    # Process data
    merge_data(stock_data_path, polarity_data_directory, output_directory)


Processed data saved to: datasets/FA_StockPrices/processed/merged/khodro_merged_data_filtered.csv
Processed data saved to: datasets/FA_StockPrices/processed/merged/shabendar_merged_data_filtered.csv
Processed data saved to: datasets/FA_StockPrices/processed/merged/shapna_merged_data_filtered.csv
Processed data saved to: datasets/FA_StockPrices/processed/merged/vnaft_merged_data_filtered.csv
Processed data saved to: datasets/FA_StockPrices/processed/merged/zamyad_merged_data_filtered.csv


# DQL for Learning the actions

In [1]:
from models.DeepQLearningModel import DeepQLearningModel
import os

# Set the directory containing the CSV files
directory = 'datasets/FA_StockPrices/processed/merged/'
outdirectory = 'datasets/FA_StockPrices/processed/merged/out'

# Instantiate the DeepQLearningModel
model = DeepQLearningModel(directory, outdirectory)
    
# Main loop to process each CSV file
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        model.process_file(filename)



  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Processed file: khodro_merged_data_filtered.csv


  super().__init__(**kwargs)


[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step
Processed file: shabendar_merged_data_filtered.csv


  super().__init__(**kwargs)


[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Processed file: shapna_merged_data_filtered.csv


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step
Processed file: vnaft_merged_data_filtered.csv


  super().__init__(**kwargs)


[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
Processed file: zamyad_merged_data_filtered.csv


# making state space for regression

In [1]:
import os
import pandas as pd
import numpy as np

# Function to calculate moving average
def calculate_moving_average(data, window_size=7):
    return data.rolling(window=window_size).mean()

# Function to calculate exponential moving average
def calculate_exponential_moving_average(data, alpha=0.2):
    return data.ewm(alpha=alpha, adjust=False).mean()

# Set the directory containing the CSV files
input_directory = 'datasets/FA_StockPrices/processed/merged/out'
output_directory = 'datasets/FA_StockPrices/processed/merged/out/RegData'

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Get the list of CSV files
files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

for file in files:
    # Load the CSV file
    df = pd.read_csv(os.path.join(input_directory, file))

    # Select only the required columns
    df = df[['DTYYYYMMDD', 'FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action']]

    # Calculate moving average and exponential moving average
    df['Moving_Average'] = calculate_moving_average(df['FIRST']).fillna(0)
    df['Exponential_Moving_Average'] = calculate_exponential_moving_average(df['FIRST'])
    
    # Calculate return rate
    df['Return_Rate'] = (df['CLOSE'] - df['OPEN']) / df['OPEN']

    # Save the processed DataFrame to a new CSV file
    output_file = os.path.join(output_directory, f'RegData_{file}')
    df.to_csv(output_file, index=False)

    print(f'Processed file: {file}, Output saved to: {output_file}')


Processed file: output_khodro_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_khodro_merged_data_filtered.csv
Processed file: output_shabendar_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_shabendar_merged_data_filtered.csv
Processed file: output_shapna_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_shapna_merged_data_filtered.csv
Processed file: output_vnaft_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_vnaft_merged_data_filtered.csv
Processed file: output_zamyad_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData\RegData_output_zamyad_merged_data_filtered.csv


In [3]:
import os
import pandas as pd
import numpy as np

# Set the input and output directories
input_directory = 'datasets/FA_StockPrices/processed/merged/out/RegData'
output_directory = 'datasets/FA_StockPrices/processed/merged/out/RegData/Processed'

# Create the output directory if it doesn't exist
if not os.path.exists(output_directory):
    os.makedirs(output_directory)

# Function to construct the desired format for each row
def construct_row(current_row, prev_rows):
    row_values = list(current_row.values)  # Current row values
    for prev_row in prev_rows:
        row_values.extend(list(prev_row.values))  # Previous row values
    return row_values

# Get the list of input files
input_files = [f for f in os.listdir(input_directory) if f.endswith('.csv')]

# Iterate over each input file
for file in input_files:
    # Read the input CSV file
    df = pd.read_csv(os.path.join(input_directory, file))
    
    # Initialize a list to store processed data
    processed_data = []

    # Iterate over each row in the DataFrame
    for i in range(len(df)):
        # Get the current row and the previous 6 rows
        current_row = df.iloc[i][['DTYYYYMMDD', 'FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action',
                                  'Moving_Average', 'Exponential_Moving_Average', 'Return_Rate']]
        prev_rows = [df.iloc[i-j][['FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action',
                                   'Moving_Average', 'Exponential_Moving_Average', 'Return_Rate']] if i >= j else pd.Series([0] * 10) for j in range(1, 7)]
        
        # Construct the row with 71 columns
        row_values = construct_row(current_row, prev_rows)
        
        # Append the row to the processed data
        processed_data.append(row_values)

    # Define meaningful column names
    column_names = ['DTYYYYMMDD'] + \
                   [f'CURRENT_{col}' for col in ['FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action',
                                                  'Moving_Average', 'Exponential_Moving_Average', 'Return_Rate']] + \
                   [f'PREVIOUS_{i}_{col}' for i in range(1, 7) for col in ['FIRST', 'HIGH', 'LOW', 'CLOSE', 'VOL', 'OPEN', 'Action',
                                                                          'Moving_Average', 'Exponential_Moving_Average', 'Return_Rate']]

    # Create a DataFrame from the processed data
    processed_df = pd.DataFrame(processed_data, columns=column_names)

    # Save the processed DataFrame to a new CSV file
    output_file = os.path.join(output_directory, f'processed_{file}')
    processed_df.to_csv(output_file, index=False)

    print(f'Processed file: {file}, Output saved to: {output_file}')


Processed file: RegData_output_khodro_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_RegData_output_khodro_merged_data_filtered.csv
Processed file: RegData_output_shabendar_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_RegData_output_shabendar_merged_data_filtered.csv
Processed file: RegData_output_shapna_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_RegData_output_shapna_merged_data_filtered.csv
Processed file: RegData_output_vnaft_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_RegData_output_vnaft_merged_data_filtered.csv
Processed file: RegData_output_zamyad_merged_data_filtered.csv, Output saved to: datasets/FA_StockPrices/processed/merged/out/RegData/Processed\processed_RegData_output_zamyad_merged_data_filt

# DQN Model for Regression

In [1]:
from models.StockPriceModelTrainer import StockPriceModelTrainer
import os

# Set the directory containing the CSV files
directory = 'datasets/FA_StockPrices/processed/merged/out/RegData/Processed'
outdirectory = 'datasets/Models'

# Instantiate the StockPriceModelTrainer class
model_trainer = StockPriceModelTrainer(directory, outdirectory)

# Main loop to process each CSV file
for filename in os.listdir(directory):
    if filename.endswith('.csv'):
        model_trainer.process_file(filename)

Epoch 1/100


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 51ms/step - loss: 0.1806 - val_loss: 0.0659
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0624 - val_loss: 0.0407
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0416 - val_loss: 0.0347
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0319 - val_loss: 0.0276
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0247 - val_loss: 0.0235
Epoch 6/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0187 - val_loss: 0.0218
Epoch 7/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0163 - val_loss: 0.0200
Epoch 8/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0146 - val_loss: 0.0185
Epoch 9/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7




LSTM Metrics:
MSE for processed_RegData_output_khodro_merged_data_filtered.csv: 0.010849927415198158
RMSE for processed_RegData_output_khodro_merged_data_filtered.csv: 0.10416298486121717
MAE for processed_RegData_output_khodro_merged_data_filtered.csv: 0.07591647050505265
MAPE for processed_RegData_output_khodro_merged_data_filtered.csv: 53.1512061042633
ROR for processed_RegData_output_khodro_merged_data_filtered.csv: 2.3965362515283983
Max Drawdown for processed_RegData_output_khodro_merged_data_filtered.csv: 0.140894827342428
Sharpe Ratio for processed_RegData_output_khodro_merged_data_filtered.csv: 1.923878139586915
Epoch 1/100


  super().__init__(**kwargs)


[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 68ms/step - loss: 0.1358 - val_loss: 0.0603
Epoch 2/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0577 - val_loss: 0.0477
Epoch 3/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.0434 - val_loss: 0.0411
Epoch 4/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0327 - val_loss: 0.0352
Epoch 5/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0282 - val_loss: 0.0307
Epoch 6/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0217 - val_loss: 0.0267
Epoch 7/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0187 - val_loss: 0.0241
Epoch 8/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0158 - val_loss: 0.0225
Epoch 9/100
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 




MSE for processed_RegData_output_shabendar_merged_data_filtered.csv: 0.01424179425752796
RMSE for processed_RegData_output_shabendar_merged_data_filtered.csv: 0.11933898884073034
MAE for processed_RegData_output_shabendar_merged_data_filtered.csv: 0.08655623135132143
MAPE for processed_RegData_output_shabendar_merged_data_filtered.csv: 62.91639929078876
ROR for processed_RegData_output_shabendar_merged_data_filtered.csv: -20.9977896039622
Max Drawdown for processed_RegData_output_shabendar_merged_data_filtered.csv: 0.5430266760206968
Sharpe Ratio for processed_RegData_output_shabendar_merged_data_filtered.csv: 1.0862715192209542
Epoch 1/100


  super().__init__(**kwargs)


[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 55ms/step - loss: 0.1009 - val_loss: 0.0509
Epoch 2/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0461 - val_loss: 0.0347
Epoch 3/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0315 - val_loss: 0.0272
Epoch 4/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0242 - val_loss: 0.0217
Epoch 5/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0180 - val_loss: 0.0187
Epoch 6/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0138 - val_loss: 0.0165
Epoch 7/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0115 - val_loss: 0.0150
Epoch 8/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0107 - val_loss: 0.0139
Epoch 9/100
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7







[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 92ms/step





LSTM Metrics:
MSE for processed_RegData_output_shapna_merged_data_filtered.csv: 0.008600468765522505
RMSE for processed_RegData_output_shapna_merged_data_filtered.csv: 0.09273871233483083
MAE for processed_RegData_output_shapna_merged_data_filtered.csv: 0.05913644838552185
MAPE for processed_RegData_output_shapna_merged_data_filtered.csv: 45.34150060668511
ROR for processed_RegData_output_shapna_merged_data_filtered.csv: -33.86908790830329
Max Drawdown for processed_RegData_output_shapna_merged_data_filtered.csv: 0.7946304520082764
Sharpe Ratio for processed_RegData_output_shapna_merged_data_filtered.csv: 1.0283668911806632
Epoch 1/100


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 44ms/step - loss: 0.1235 - val_loss: 0.0483
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0472 - val_loss: 0.0339
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0304 - val_loss: 0.0248
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0221 - val_loss: 0.0194
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0164 - val_loss: 0.0162
Epoch 6/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0135 - val_loss: 0.0147
Epoch 7/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0112 - val_loss: 0.0129
Epoch 8/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0098 - val_loss: 0.0120
Epoch 9/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7




LSTM Metrics:
MSE for processed_RegData_output_vnaft_merged_data_filtered.csv: 0.005318804473506441
RMSE for processed_RegData_output_vnaft_merged_data_filtered.csv: 0.07293013419366813
MAE for processed_RegData_output_vnaft_merged_data_filtered.csv: 0.05298987298799002
MAPE for processed_RegData_output_vnaft_merged_data_filtered.csv: 36.82308115056408
ROR for processed_RegData_output_vnaft_merged_data_filtered.csv: -65.47357030337106
Max Drawdown for processed_RegData_output_vnaft_merged_data_filtered.csv: 0.3110614431608906
Sharpe Ratio for processed_RegData_output_vnaft_merged_data_filtered.csv: 1.5583712687457087
Epoch 1/100


  super().__init__(**kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 46ms/step - loss: 0.1849 - val_loss: 0.0955
Epoch 2/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0687 - val_loss: 0.0627
Epoch 3/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0494 - val_loss: 0.0508
Epoch 4/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0393 - val_loss: 0.0429
Epoch 5/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0291 - val_loss: 0.0371
Epoch 6/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0238 - val_loss: 0.0326
Epoch 7/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0202 - val_loss: 0.0297
Epoch 8/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0174 - val_loss: 0.0274
Epoch 9/100
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7




LSTM Metrics:
MSE for processed_RegData_output_zamyad_merged_data_filtered.csv: 0.009609267113863482
RMSE for processed_RegData_output_zamyad_merged_data_filtered.csv: 0.09802686934643727
MAE for processed_RegData_output_zamyad_merged_data_filtered.csv: 0.06839690698764705
MAPE for processed_RegData_output_zamyad_merged_data_filtered.csv: 47.46400503291791
ROR for processed_RegData_output_zamyad_merged_data_filtered.csv: -19.048321662467263
Max Drawdown for processed_RegData_output_zamyad_merged_data_filtered.csv: 0.8134172883210016
Sharpe Ratio for processed_RegData_output_zamyad_merged_data_filtered.csv: 1.0685584631424132
