In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import mean_squared_error
from keras.layers import Dropout

2024-04-24 07:51:42.744854: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-04-24 07:51:42.746997: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-24 07:51:42.785046: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-04-24 07:51:42.785886: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI AVX512_BF16 AVX_VNNI AMX_TILE AMX_INT8 AMX_BF16 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
def resample_and_adjust_using_original_times(df, target_count):
    # Creating a new column 'Time_bin' to convert 'Time' values into 10-second bins
    df['Time_bin'] = (df['Time'] // 5 * 5).astype(int)
    
    all_time_bins = range(df['Time_bin'].min(), df['Time_bin'].max() + 5, 5)
    grouped = df.groupby('Time_bin')

    new_records = []
    last_known_price = None 
    
    # Iterating through all possible time bins
    for time_bin in all_time_bins:
        # If the current time bin exists within the grouped data
        if time_bin in grouped.groups:
            group = grouped.get_group(time_bin)
            times = group['Time'].values
            prices = group['Weighted_Price'].values
            # If the prices array is not empty, update the last known price
            if len(prices) > 0:
                last_known_price = prices[-1]
        else:
            group = None 
            
        # If the current time bin is empty or has only one price data point
        if group is None or len(prices) == 0:
            prices = np.full(target_count, last_known_price)
            times = np.linspace(time_bin, time_bin + 4, num=target_count)
        elif len(prices) == 1:
            times = np.linspace(time_bin, time_bin + 4, num=target_count)
            prices = np.full(target_count, last_known_price)
        else:
            # If there are fewer data points than the target count, additional points will be added
            while len(prices) < target_count:
                time_diffs = np.diff(times)
                idx_to_fill = np.argmax(time_diffs)
                new_time = (times[idx_to_fill] + times[idx_to_fill + 1]) / 2
                new_price = (prices[idx_to_fill] + prices[idx_to_fill + 1]) / 2
                times = np.insert(times, idx_to_fill + 1, new_time)
                prices = np.insert(prices, idx_to_fill + 1, new_price)
            # If there are multiple data points within the time bin, proceed with normal processing
            while len(prices) > target_count:
                time_diffs = np.diff(times)
                idx_to_merge = np.argmin(time_diffs)
                new_time = (times[idx_to_merge] + times[idx_to_merge + 1]) / 2
                new_price = (prices[idx_to_merge] + prices[idx_to_merge + 1]) / 2
                times = np.delete(times, [idx_to_merge, idx_to_merge + 1])
                times = np.insert(times, idx_to_merge, new_time)
                prices = np.delete(prices, [idx_to_merge, idx_to_merge + 1])
                prices = np.insert(prices, idx_to_merge, new_price)
         # Add the processed data to the results list          
        new_records.extend(zip(times, prices))

    new_df = pd.DataFrame(new_records, columns=['Time', 'Weighted_Price'])
    new_df.sort_values('Time', inplace=True)
    return new_df

In [3]:
import os
import pandas as pd

cleaned_files_directory = 'Cleaned_Tapes'
dataframes = {}

# Loop through each cleaned CSV file and process it
for cleaned_file in os.listdir(cleaned_files_directory):
    if cleaned_file.startswith('Cleaned_') and cleaned_file.endswith('.csv'):
        # Read the cleaned file
        file_path = os.path.join(cleaned_files_directory, cleaned_file)
        df = pd.read_csv(file_path)
        
        # Extract only the 'Time' and 'Weighted_Price' columns
        subset = df[['Time', 'Weighted_Price']].copy()
        processed_df = resample_and_adjust_using_original_times(subset, 3)
        
        # Use the original file name without 'Cleaned_' as the dictionary key
        df_key = cleaned_file[len('Cleaned_'):]
        
        # Store the subset DataFrame in the dictionary
        dataframes[df_key] = processed_df
for key in dataframes:
    print(f"{key}:")
    print(dataframes[key].head())  # Show the first few rows of each DataFrame


UoB_Set01_2025-01-02tapes.csv:
       Time  Weighted_Price
0  11.01275      267.500000
1  12.33800      270.000000
2  13.73300      267.000000
3  18.32100      265.000000
4  18.98750      264.333333
UoB_Set01_2025-01-03tapes.csv:
   Time  Weighted_Price
0   0.0           281.0
1   2.0           281.0
2   4.0           281.0
3   5.0           283.0
4   7.0           283.0
UoB_Set01_2025-01-06tapes.csv:
   Time  Weighted_Price
0   0.0      278.000000
1   2.0      278.000000
2   4.0      278.000000
3   5.0      279.142857
4   7.0      279.142857
UoB_Set01_2025-01-07tapes.csv:
   Time  Weighted_Price
0   5.0           336.0
1   7.0           336.0
2   9.0           336.0
3  10.0           337.0
4  12.0           337.0
UoB_Set01_2025-01-08tapes.csv:
   Time  Weighted_Price
0   5.0           365.0
1   7.0           365.0
2   9.0           365.0
3  10.0           365.0
4  12.0           365.0
UoB_Set01_2025-01-09tapes.csv:
    Time  Weighted_Price
0  3.131           316.0
1  3.751           3

In [4]:
def create_dataset(data, time_step, step=3):
    X, y = [], []
    for i in range(0, len(data)-time_step, step):
        X.append(data[i:(i+time_step), 0])
        y.append(data[(i+time_step):(i+time_step+3), 0])
    return np.array(X), np.array(y)

In [5]:
time_step = 108

In [6]:
training_data = []
validation_data = []

In [7]:
scaler = MinMaxScaler(feature_range=(0, 1))

In [8]:
for index, (key, df) in enumerate(dataframes.items()):
    # Select the 'Weighted_Price' column and scale it
    price = df['Weighted_Price'].values.reshape(-1, 1)
    scaled_data = scaler.fit_transform(price)

    # Create the dataset
    X, y = create_dataset(scaled_data, time_step)

    # Reshape the data as required by your model
    X = X.reshape(X.shape[0], X.shape[1], 1)
    y = y.reshape(y.shape[0], y.shape[1], 1)

    # Split the data into training and validation sets
    if index < 80:  # First 80 DataFrames for training
        training_data.append((X, y))
    elif 80 <= index < 100:  # Next 20 DataFrames for validation
        validation_data.append((X, y))

In [9]:
X_train_combined = np.concatenate([X for X, y in training_data])
y_train_combined = np.concatenate([y for X, y in training_data])
X_valid_combined = np.concatenate([X for X, y in validation_data])
y_valid_combined = np.concatenate([y for X, y in validation_data])

In [10]:
model = Sequential()
model.add(LSTM(units=50, return_sequences=True,input_shape=(X_train_combined.shape[1], X_train_combined.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(units=50))
model.add(Dense(100, activation='relu'))
model.add(Dense(units=3))  

In [11]:
model.compile(optimizer='adam', loss='mean_squared_error')

In [12]:
history = model.fit(
    X_train_combined, y_train_combined,
    epochs=50, batch_size=64,
    validation_data=(X_valid_combined, y_valid_combined)
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
 591/7604 [=>............................] - ETA: 5:05 - loss: 0.0077

KeyboardInterrupt: 

In [13]:
model.save('model32.keras')

In [14]:
predictions_dict = {}
true_values_dict = {}
for test_index, (key, df) in enumerate(dataframes.items()):
    if test_index >= 100:  # Skip if index is >= 100
        price = df['Weighted_Price'].values.reshape(-1, 1)
        scaled_data = scaler.fit_transform(price)
    
        # Create the dataset
        X, y = create_dataset(scaled_data, time_step)
    
        # Reshape the data as required by your model
        X = X.reshape(X.shape[0], X.shape[1], 1)
        predictions = model.predict(X)
        predictions = scaler.inverse_transform(predictions)
        predictions_dict[key] = predictions.flatten()
        true_values_dict[key] = y.flatten()



In [15]:
for key in true_values_dict.keys():
    values = np.array(true_values_dict[key]).reshape(-1, 1)
    true_values_dict[key] = scaler.inverse_transform(values).flatten()

In [16]:
from sklearn.metrics import r2_score
for key in predictions_dict:
    if key in true_values_dict:
        predictions = predictions_dict[key]
        true_values = true_values_dict[key]
        rmse = np.sqrt(mean_squared_error(true_values, predictions))
        r_squared = r2_score(true_values, predictions)
        print(rmse)
        print(r_squared)

19.83520008640631
-35.852820719588266
12.688442297293797
-14.296999615222985
10.01020444476438
-6.955152941738852
5.293026955068776
-1.3636056062074444
5.141291034930369
-0.9974977298543011
6.01231789507342
-1.8557193519602633
5.645657753078994
-0.7308254149420552
6.553476984022199
-1.7014809368742125
7.151489487374507
-2.913183074898993
7.108789290382777
-3.8153100626455263
5.251204970450874
-1.228598036335025
4.101765362019997
-0.0949339737846493
4.184935404294852
-0.5306979551937019
4.168470501737863
-0.3192005839742662
3.8803215465921728
-0.09735496118951681
2.746668656472422
0.25653294804181304
3.1820051537002394
0.1721636724080382
3.5960961624026084
0.2431131093896267
3.006341079734207
0.17048293799377434
3.2821098696648607
0.2309815310568093
3.102819008787536
0.2862617313025416
3.2470262045366414
0.3225476991512639
3.3733008144864045
0.2276691192644661
3.357768249252377
0.22184924843268805
2.789095559757236
0.28608126834127157


In [17]:
all_predictions = np.array([])
all_true_values = np.array([])

for key in predictions_dict:
    all_predictions = np.concatenate((all_predictions, predictions_dict[key]))
    all_true_values = np.concatenate((all_true_values, true_values_dict[key]))

rmse = np.sqrt(mean_squared_error(all_true_values, all_predictions))
r_squared = r2_score(all_true_values, all_predictions)

print('Overall Root Mean Squared Error:', rmse)
print('Overall R² Score:', r_squared)

Overall Root Mean Squared Error: 6.684507808952734
Overall R² Score: -2.22955393878563


In [57]:
all_results

Unnamed: 0,Highest_Price,Lowest_Price,Threshold,Decision,act_buy,act_sell
1970-01-01 06:49:58.593000000,281.687134,279.593140,0.063158,1,True,True
1970-01-01 06:50:08.730000000,282.037567,281.093201,0.064157,1,True,True
1970-01-01 06:50:11.303000000,283.740082,283.349152,0.064522,2,True,True
1970-01-01 06:50:23.331000000,284.867401,284.443359,0.065270,2,True,True
1970-01-01 06:50:38.521000000,284.708374,283.757538,0.066068,1,True,True
...,...,...,...,...,...,...
1970-01-01 08:29:18.281000000,291.102325,289.757324,0.070121,1,True,True
1970-01-01 08:29:24.419000000,290.940887,289.985931,0.070406,1,True,True
1970-01-01 08:29:38.896000000,291.549927,290.852631,0.070923,1,True,True
1970-01-01 08:29:49.095000000,290.971527,289.782532,0.070587,1,True,True


In [58]:
initial_capital = 10000
transaction_num = 1
hold_stocks = False
transaction_result = []
stock_num = 0


for index, row in all_results.iterrows():
    operation = row['Decision']
    highest_price = row['Highest_Price']
    lowest_price = row['Lowest_Price']
    act_buy = row['act_buy']
    act_sell = row['act_sell']
    
    if highest_price-lowest_price >= 1 and operation == 1:
        
        if  act_buy == True and act_sell == True  and initial_capital >= highest_price:

            initial_capital -= transaction_num * lowest_price
            stock_num += transaction_num
            initial_capital += stock_num*highest_price
            sell_num = stock_num
            stock_num = 0
            if stock_num != 0:
                hold_stocks = True
            else:
                hold_stocks = False
            transaction_result.append({'operation': 'buy first and sell all', 'buy price':lowest_price,'sell price':highest_price ,'sell number':sell_num ,'buy_num':transaction_num})

        if  act_buy == False and act_sell == True   and hold_stocks == True :

            initial_capital += stock_num*highest_price
            sell_num = stock_num
            stock_num = 0
            if stock_num != 0:
                hold_stocks = True
            else:
                hold_stocks = False
            transaction_result.append({'operation': 'buy faild only sell', 'buy price': 0,'sell price': highest_price,'sell number':sell_num ,'buy_num': 0 })

        if  act_buy == False and act_sell == True  and initial_capital >= highest_price :
            initial_capital -= transaction_num * lowest_price
            stock_num += transaction_num
            if stock_num != 0:
                hold_stocks = True
            else:
                hold_stocks = False
            transaction_result.append({'operation': 'sell faild only buy', 'buy price':lowest_price ,'sell price': 0,'sell number':0 ,'buy_num': transaction_num })

        if  act_buy == False and act_sell == False:
            if stock_num != 0:
                hold_stocks = True
            else:
                hold_stocks = False
            transaction_result.append({'operation': 'sell faild and buy faild', 'buy price':0 ,'sell price': 0,'sell number':0 ,'buy_num': 0 })

if hold_stocks == True:
    final_price = data_copy['Weighted_Price'].iloc[-1]
    initial_capital += final_price*stock_num
    sell_num = stock_num
    stock_num = 0
    hold_stocks = False
    transaction_result.append({'operation': 'sell all', 'buy price':final_price ,'sell price': 0,'sell number':sell_num ,'buy_num': stock_num })
    
transaction_result_df = pd.DataFrame(transaction_result)
print(transaction_result_df)

# Calculate final capital and profit
final_capital = initial_capital
profit = final_capital - 10000
profitability = profit/10000
profitability_percent = "{:.4f}%".format(profitability * 100)
print(f'capital at closing：{final_capital}')
print(f'total_profit：{profit}')
print(f'Profitability：{profitability_percent}')

# profitability_data = {
#     'Final Capital': final_capital,
#     'Profit': profit,
#     'Profitability': profitability,
#     'Profitability Percent': profitability_percent
# }

#     # Convert the dictionary to a DataFrame
# profitability_df = pd.DataFrame([profitability_data])

    
# if match:
#     date_string = match.group(0)
#     new_file_name = f'{date_string}_transaction_result.csv'
#     new_file_name1 = f'{date_string}_profitability.csv'
#     transaction_result_df.to_csv(os.path.join(output_folder, new_file_name))
#     profitability_df.to_csv(os.path.join(output_folder, new_file_name1))

                  operation   buy price  sell price  sell number  buy_num
0    buy first and sell all  279.593140  281.687134            1        1
1    buy first and sell all  282.208801  283.672333            1        1
2    buy first and sell all  280.869812  283.065247            1        1
3    buy first and sell all  280.738647  282.293060            1        1
4    buy first and sell all  277.968994  280.235382            1        1
..                      ...         ...         ...          ...      ...
288  buy first and sell all  290.608490  291.896332            1        1
289  buy first and sell all  287.731140  289.735046            1        1
290  buy first and sell all  290.583191  291.648132            1        1
291  buy first and sell all  289.757324  291.102325            1        1
292  buy first and sell all  289.782532  290.971527            1        1

[293 rows x 5 columns]
capital at closing：10445.081329345703
total_profit：445.0813293457031
Profitability：4.450