In [1]:
import os
import pandas as pd

# Set directory where CSV files are located
dataSourceFolder = "C:\\Users\\e0253700\\Desktop\\NUS\\MFE\\Semester 2\\FE5107\\Project\\TRD_Dalyr\\"

# Get a list of all the CSV files in the folder
csv_files = [file for file in os.listdir(dataSourceFolder) if file.endswith('.csv')]

# Create an empty list to store the dataframe
dfs = []

# Loop through the CSV files and read them into dataframes
for file in csv_files:
    file_path = os.path.join(dataSourceFolder, file)
    df = pd.read_csv(file_path)
    dfs.append(df)

# Concatenate the dataframes into a single dataframe
df = pd.concat(dfs, ignore_index=True)

print(df.shape)

(11171092, 27)


In [2]:
# Data cleaning

# Remove duplicate rows
df.drop_duplicates(inplace=True)

# Keep Markettype = 1, 4, 16, 32
valid_market_types = [1, 4, 16, 32]
df = df[df['Markettype'].isin(valid_market_types)]

# Keep only useful columns
columns_to_keep = ['Stkcd', 'Trddt', 'Clsprc']
df = df.loc[:, columns_to_keep]

df = df.rename(columns={'Stkcd': 'StockCode'})
df = df.rename(columns={'Trddt': 'TradeDate'})
df = df.rename(columns={'Clsprc': 'ClosingPrice'})

print(df.shape)

(4925100, 3)


In [9]:
import pandas as pd
import numpy as np

def compute_weights(halflife, num_days):
    weights = np.full(num_days, np.nan)
    for t in range(num_days):
        weights[t] = np.exp(-np.log(2) * t / halflife)
    return weights

# Calculate RSTR
def calculate_rstr(stock_data, halflife=120, lookback=500-21):
    stock_data['Returns'] = np.log(1 + stock_data['ClosingPrice'].pct_change())

    if len(stock_data) < lookback + 1:  # Check if the stock has enough trading days
        return np.nan

    # Intercept the return rate array, and then adjust the weight according to the actual trading days
    actual_lookback = len(stock_data) - 21
    weights = compute_weights(halflife, actual_lookback)
    returns_adjusted = stock_data['Returns'].iloc[-actual_lookback-1:-21]
    rstr = (returns_adjusted * weights[:len(returns_adjusted)]).sum()
    return rstr

# Calculate RSTR
unique_stocks = df['StockCode'].unique()
results = []

for stock_code in unique_stocks:
    stock_data = df[df['StockCode'] == stock_code].sort_values('TradeDate', ascending=True)
    stock_data.set_index('TradeDate', inplace=True)
    rstr = calculate_rstr(stock_data)
    results.append({'StockCode': stock_code, 'RSTR': rstr})

rstr_df = pd.DataFrame(results)
df = df.merge(rstr_df, on='StockCode', how='left')


In [11]:
print(df.shape)
df.to_csv(dataSourceFolder + "momentum.csv", index=False)

(4925100, 4)
