In [19]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, GRU
from tensorflow.keras.optimizers import SGD
from tensorflow.random import set_seed

In [10]:
file_paths = [
    '/Users/ibrahimuali/Documents/GitHub/ML_4Fin/Data/Log Files/0f5b852e-5a27-4716-b162-35a8d8cce6e4_final 2.log',
    '/Users/ibrahimuali/Documents/GitHub/ML_4Fin/Data/Log Files/12eeee45-089e-4e6a-8b83-6afc887d926e_final.log',
    '/Users/ibrahimuali/Documents/GitHub/ML_4Fin/Data/Log Files/1a31c65b-827a-4d00-943f-9a1f33f344d1_final.log',
    '/Users/ibrahimuali/Documents/GitHub/ML_4Fin/Data/Log Files/2ed227e7-def4-4baf-835d-8db512357a91_final.log',
    '/Users/ibrahimuali/Documents/GitHub/ML_4Fin/Data/Log Files/82bce015-a91c-4694-8ea8-2d6a8fcf517d_final.log'
]
'''
with open(file_paths, 'r') as file:
    log_content = file.readlines()

log_content[:5]'''

"\nwith open(file_paths, 'r') as file:\n    log_content = file.readlines()\n\nlog_content[:5]"

In [11]:
def process_log_file(file_path):
    with open(file_path, 'r') as file:
        log_content = file.readlines()
    
    # Extract "Activities Log" section
    activities_log_section = []
    capture = False
    for line in log_content:
        if "Activities log:" in line:
            capture = True
        if capture:
            activities_log_section.append(line.strip())
            if line.strip() == "":  # Assuming empty line indicates the end of the section
                break
    
    # Split the data into rows and columns
    data = [line.split(';') for line in activities_log_section[1:] if line]
    
    # Create a pandas DataFrame if data is not empty
    if data:
        columns = data[0]
        rows = data[1:]
        df = pd.DataFrame(rows, columns=columns)
        
        return df
    else:
        return None

In [12]:
all_dataframes = []
for path in file_paths:
    df = process_log_file(path)
    if df is not None:
        all_dataframes.append(df)

final_df = pd.concat(all_dataframes, ignore_index=True)

numeric_columns = final_df.columns.difference(['product'])
final_df[numeric_columns] = final_df[numeric_columns].apply(pd.to_numeric, errors='coerce')

final_df

Unnamed: 0,day,timestamp,product,bid_price_1,bid_volume_1,bid_price_2,bid_volume_2,bid_price_3,bid_volume_3,ask_price_1,ask_volume_1,ask_price_2,ask_volume_2,ask_price_3,ask_volume_3,mid_price,profit_and_loss
0,1,0,AMETHYSTS,9998,9,9995.0,23.0,,,10005,23,,,,,10001.5,0.000000
1,1,0,STARFRUIT,5047,23,,,,,5054,23,,,,,5050.5,0.000000
2,1,100,STARFRUIT,5053,1,5050.0,2.0,5047.0,22.0,5054,22,,,,,5053.5,0.000000
3,1,100,AMETHYSTS,10002,1,10000.0,2.0,9998.0,4.0,10004,1,10005.0,21.0,,,10003.0,0.000000
4,1,200,AMETHYSTS,9996,1,9995.0,21.0,,,10004,1,10005.0,21.0,,,10000.0,2.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299995,5,999900,ORCHIDS,1027,5,1026.0,2.0,1019.0,50.0,1035,5,1036.0,2.0,1043.0,50.0,1031.0,0.000000
299996,5,999900,ROSES,13624,49,,,,,13625,49,,,,,13624.5,0.000000
299997,5,999900,STRAWBERRIES,3990,57,3989.0,138.0,,,3991,191,,,,,3990.5,0.000000
299998,5,999900,STARFRUIT,4918,27,,,,,4923,4,4925.0,27.0,,,4920.5,10638.089844


In [14]:
final_df.fillna(0, inplace=True)

final_df

Unnamed: 0,day,timestamp,product,bid_price_1,bid_volume_1,bid_price_2,bid_volume_2,bid_price_3,bid_volume_3,ask_price_1,ask_volume_1,ask_price_2,ask_volume_2,ask_price_3,ask_volume_3,mid_price,profit_and_loss
0,1,0,AMETHYSTS,9998,9,9995.0,23.0,0.0,0.0,10005,23,0.0,0.0,0.0,0.0,10001.5,0.000000
1,1,0,STARFRUIT,5047,23,0.0,0.0,0.0,0.0,5054,23,0.0,0.0,0.0,0.0,5050.5,0.000000
2,1,100,STARFRUIT,5053,1,5050.0,2.0,5047.0,22.0,5054,22,0.0,0.0,0.0,0.0,5053.5,0.000000
3,1,100,AMETHYSTS,10002,1,10000.0,2.0,9998.0,4.0,10004,1,10005.0,21.0,0.0,0.0,10003.0,0.000000
4,1,200,AMETHYSTS,9996,1,9995.0,21.0,0.0,0.0,10004,1,10005.0,21.0,0.0,0.0,10000.0,2.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
299995,5,999900,ORCHIDS,1027,5,1026.0,2.0,1019.0,50.0,1035,5,1036.0,2.0,1043.0,50.0,1031.0,0.000000
299996,5,999900,ROSES,13624,49,0.0,0.0,0.0,0.0,13625,49,0.0,0.0,0.0,0.0,13624.5,0.000000
299997,5,999900,STRAWBERRIES,3990,57,3989.0,138.0,0.0,0.0,3991,191,0.0,0.0,0.0,0.0,3990.5,0.000000
299998,5,999900,STARFRUIT,4918,27,0.0,0.0,0.0,0.0,4923,4,4925.0,27.0,0.0,0.0,4920.5,10638.089844


In [15]:
final_df.dtypes

day                  int64
timestamp            int64
product             object
bid_price_1          int64
bid_volume_1         int64
bid_price_2        float64
bid_volume_2       float64
bid_price_3        float64
bid_volume_3       float64
ask_price_1          int64
ask_volume_1         int64
ask_price_2        float64
ask_volume_2       float64
ask_price_3        float64
ask_volume_3       float64
mid_price          float64
profit_and_loss    float64
dtype: object

In [17]:
final_df.profit_and_loss.sum()

1206896057.177246