# Capstone Group 5 Data Preprocessing
Prepared by: `Kuhgi Jotojot`

## 1. Library Prerequisites

In [None]:
# pip install numpy pandas 

In [19]:
import pandas as pd
import numpy as np
import os
import re
import math
import calendar

# Plotting
import matplotlib.pyplot as plt

## 2. Loading Data

In [37]:
path = "raw_data/"
closing_price_list_with_time = list()

for filename in os.listdir(path):
    file_path = os.path.join(path, filename)

    pattern = r"PSE_DLY_([A-Z]+),\s*"  # Updated regex to handle the comma and optional whitespace

    match = re.search(pattern, filename)
    if match:
        ticker = match.group(1)
    else:
        print(f"No ticker in file: {filename}")
        continue  # Skip this file

    # Read CSV and set 'time' as index
    prices = pd.read_csv(file_path, parse_dates=['time'])
    prices.set_index('time', inplace=True)

    # Keep only the 'close' column, rename it to the ticker
    prices = prices[['close']]
    prices = prices.rename(columns={'close': ticker})

    # Append to list
    closing_price_list_with_time.append(prices)

# Combine all into one DataFrame
closing_prices_with_time_df = pd.concat(closing_price_list_with_time, axis=1)

# Optional: Sort by date (index)
closing_prices_with_time_df = closing_prices_with_time_df[
    (closing_prices_with_time_df.index >= '2021-06-01') & 
    (closing_prices_with_time_df.index <= '2024-12-31')
]
closing_prices_with_time_df.sort_index(inplace=True)

In [38]:
display(closing_prices_with_time_df)

Unnamed: 0_level_0,AC,ACEN,AEV,AGI,ALI,AREIT,BDO,BLOOM,BPI,CBC,...,MBT,MER,MONDE,PGOLD,SCC,SM,SMC,SMPH,TEL,URC
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-06-01,745.742852,7.126867,34.093254,9.865820,33.181935,28.876939,80.683465,6.438434,70.250094,20.011775,...,38.355134,227.273458,12.548869,32.004682,6.892854,922.658299,109.872241,35.070055,983.825397,119.078218
2021-06-02,774.903310,7.234112,34.749735,10.229097,35.030643,29.155749,81.217287,6.887173,71.416757,20.011775,...,38.394675,231.331912,12.548869,32.914151,7.047634,960.602081,113.563039,36.459444,990.631261,123.424138
2021-06-03,762.952303,7.351105,33.918192,10.171737,34.604018,28.876939,80.225903,6.955459,71.666760,19.642706,...,38.750548,230.520221,12.195118,32.610996,7.243688,958.680877,114.036219,35.836618,996.680917,122.381117
2021-06-04,770.600947,7.585093,33.830663,10.133498,34.509214,29.036261,81.598589,6.974970,71.541758,19.683713,...,38.829629,228.896840,12.474395,32.697610,7.130183,960.602081,112.143501,35.453337,1004.242988,121.685770
2021-06-07,760.084061,7.585093,34.312081,9.789341,34.414406,29.155749,81.141027,6.877418,70.958428,19.847745,...,38.631922,223.215003,12.530251,33.087384,7.223051,950.996060,111.197143,35.117969,1000.461953,122.554954
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-12-19,589.929882,3.600000,31.160000,8.639540,24.567847,36.850836,145.718833,4.448372,122.000000,60.412500,...,69.811896,451.887359,6.930843,29.209985,31.035083,863.865206,86.675470,24.398868,1227.272730,73.644428
2024-12-20,589.433308,3.780000,31.445872,8.600000,24.469181,36.604506,142.371238,4.409351,121.500000,60.266576,...,69.621414,439.632127,7.596517,29.983759,31.271271,869.775344,86.675470,24.300880,1209.878313,72.674146
2024-12-23,595.888770,4.000000,32.112905,8.600000,25.899838,37.047899,142.863531,4.380086,122.600000,61.677174,...,71.240516,472.701801,8.007668,29.983759,31.176796,886.520736,86.923398,24.643836,1212.777382,72.771174
2024-12-26,606.813397,3.950000,32.494067,9.070000,26.146504,37.392760,143.454283,4.389841,124.400000,60.315218,...,71.478620,466.865976,8.037036,30.080481,32.499447,868.790321,81.320235,24.790818,1253.364355,77.137444


## 3. Data Preprocessing

### 3.1 Returns and Volatility

#### Notes

* Returns - mean percentage return throughout the duration of stock publicity
* Volatility - standard deviation of closing prices over the duration of stock's publicity

In [40]:
# Create an empty dataframe
returns = pd.DataFrame()

# Define the column Returns
returns['Returns'] = closing_prices_with_time_df.pct_change().mean() * 252

# Define the column Volatility
returns['Volatility'] = closing_prices_with_time_df.pct_change().std() * math.sqrt(252)

  returns['Returns'] = closing_prices_with_time_df.pct_change().mean() * 252
  returns['Volatility'] = closing_prices_with_time_df.pct_change().std() * math.sqrt(252)


In [41]:
display(returns)

Unnamed: 0,Returns,Volatility
AC,-0.016715,0.304282
ACEN,-0.087725,0.395486
AEV,0.051135,0.354283
AGI,0.019598,0.30376
ALI,-0.008708,0.35475
AREIT,0.108493,0.261918
BDO,0.205465,0.296595
BLOOM,-0.026898,0.394885
BPI,0.197432,0.28019
CBC,0.340479,0.186384


### 3.2 P/E Ratio

In [42]:
data_dir = "raw_data"

pe_ratios = {}

for stock in returns.index:
    matching_files = [f for f in os.listdir(data_dir) if stock in f and f.endswith('.csv')]

    if matching_files:
        file_path = os.path.join(data_dir, matching_files[0])
        
        # Read and get the latest P/E ratio
        df_pe = pd.read_csv(file_path, parse_dates=['time'])
        latest_pe = df_pe.sort_values('time').iloc[-1]['P/E ratio']
        
        pe_ratios[stock] = latest_pe
    else:
        pe_ratios[np.nan] = None  # or you can use np.nan


returns['P/E ratio'] = returns.index.map(pe_ratios)

In [43]:
display(returns)

Unnamed: 0,Returns,Volatility,P/E ratio
AC,-0.016715,0.304282,8.985726
ACEN,-0.087725,0.395486,14.889815
AEV,0.051135,0.354283,12.114948
AGI,0.019598,0.30376,2.881266
ALI,-0.008708,0.35475,11.838829
AREIT,0.108493,0.261918,15.318954
BDO,0.205465,0.296595,10.499345
BLOOM,-0.026898,0.394885,12.995808
BPI,0.197432,0.28019,11.620175
CBC,0.340479,0.186384,7.680736


### 3.3 Seasonal Indices

In [51]:
# Ensure the index is datetime
closing_prices_with_time_df.index = pd.to_datetime(closing_prices_with_time_df.index)

# Create temp dataframe then
temp_df = pd.DataFrame()

# Calculate grand mean for each stock
temp_df['Grand_Mean'] = closing_prices_with_time_df.mean()

for i in range(1,13):
    month_name = calendar.month_name[i]  # Converts 12 -> "December"

    # Calculate monthly average for the specified month
    monthly_avg = closing_prices_with_time_df[closing_prices_with_time_df.index.month == i].mean()

    # Create the seasonality column
    temp_df[f'Seasonality_Index_{month_name}'] = monthly_avg / temp_df['Grand_Mean']


In [52]:
display(temp_df)

Unnamed: 0,Grand_Mean,Seasonality_Index_January,Seasonality_Index_February,Seasonality_Index_March,Seasonality_Index_April,Seasonality_Index_May,Seasonality_Index_June,Seasonality_Index_July,Seasonality_Index_August,Seasonality_Index_September,Seasonality_Index_October,Seasonality_Index_November,Seasonality_Index_December
AC,676.870497,1.085499,1.075501,1.016639,0.971656,0.942473,0.966019,0.928333,0.964596,1.002802,1.013113,1.025997,1.038351
ACEN,6.577332,1.039728,0.992101,0.933867,0.892726,0.881619,0.9782,1.000693,1.056918,1.031038,1.052827,1.054758,1.015996
AEV,44.380576,1.12989,1.105525,1.096346,1.037275,0.990998,0.950826,0.939544,0.93665,0.965679,0.972533,0.973864,0.998352
AGI,10.494684,1.109409,1.135397,1.103001,1.058245,1.053464,0.979276,0.955366,0.954901,0.959451,0.924096,0.921112,0.966864
ALI,29.77868,1.07741,1.094022,1.044585,0.981637,0.910649,0.957837,0.939073,0.981452,1.00833,1.014234,1.008736,1.015532
AREIT,32.736982,1.050016,1.056174,1.042062,0.994552,0.95815,0.944951,0.959055,0.982419,0.982092,0.98782,1.02226,1.054752
BDO,116.766228,0.979305,1.053018,1.029921,1.060811,1.040318,0.93444,0.962937,0.970385,0.99306,1.023168,1.009887,0.990335
BLOOM,8.03219,1.041532,1.120378,1.089848,1.056362,1.127828,0.993561,0.9919,0.961446,0.97714,0.940262,0.93041,0.873347
BPI,95.633287,0.965547,1.012886,0.996696,1.036725,1.047646,0.941633,0.984412,0.977885,0.986978,1.031243,1.02543,1.013541
CBC,28.975869,0.854247,0.912591,0.928765,0.983167,1.043833,0.929564,0.959681,0.961019,1.004698,1.123562,1.112596,1.126258


In [53]:
temp_df = temp_df.drop(columns=['Grand_Mean'])

In [54]:
seasonality_df = temp_df.copy()
seasonality_df = seasonality_df.transpose()

In [57]:
seasonality_df.to_csv('data/seasonality_data.csv')

In [None]:
# plt.figure(figsize=(10, 6))
# plt.plot(temp_df.index, temp_df['SMC'], marker='o', linestyle='-', color='blue')
# plt.title('Seasonality Index per Month')
# plt.xlabel('Month')
# plt.ylabel('Seasonality Index')
# plt.xticks(rotation=45)
# plt.grid(True)
# plt.tight_layout()
# plt.show()

In [58]:
# Create a list of month names in order
month_names = [calendar.month_name[m] for m in range(1, 13)]

# Loop through months from February to December to calculate MoM seasonality
for i in range(1, len(month_names)):
    curr_month = month_names[i]
    prev_month = month_names[i - 1]
    
    curr_col = f'Seasonality_Index_{curr_month}'
    prev_col = f'Seasonality_Index_{prev_month}'
    
    new_col = f'Seasonality_MoM_{curr_month}'
    returns[new_col] = temp_df[curr_col] - temp_df[prev_col]

In [59]:
display(returns)

Unnamed: 0,Returns,Volatility,P/E ratio,Seasonality_MoM_February,Seasonality_MoM_March,Seasonality_MoM_April,Seasonality_MoM_May,Seasonality_MoM_June,Seasonality_MoM_July,Seasonality_MoM_August,Seasonality_MoM_September,Seasonality_MoM_October,Seasonality_MoM_November,Seasonality_MoM_December
AC,-0.016715,0.304282,8.985726,-0.009999,-0.058862,-0.044983,-0.029183,0.023546,-0.037687,0.036263,0.038206,0.010311,0.012884,0.012354
ACEN,-0.087725,0.395486,14.889815,-0.047627,-0.058235,-0.041141,-0.011107,0.096581,0.022493,0.056225,-0.02588,0.021788,0.001931,-0.038762
AEV,0.051135,0.354283,12.114948,-0.024365,-0.009179,-0.059071,-0.046277,-0.040172,-0.011282,-0.002894,0.029029,0.006855,0.00133,0.024488
AGI,0.019598,0.30376,2.881266,0.025988,-0.032396,-0.044755,-0.004781,-0.074188,-0.023911,-0.000464,0.00455,-0.035355,-0.002984,0.045752
ALI,-0.008708,0.35475,11.838829,0.016611,-0.049436,-0.062949,-0.070988,0.047188,-0.018764,0.042379,0.026878,0.005904,-0.005498,0.006796
AREIT,0.108493,0.261918,15.318954,0.006158,-0.014113,-0.04751,-0.036402,-0.013199,0.014104,0.023364,-0.000327,0.005728,0.03444,0.032492
BDO,0.205465,0.296595,10.499345,0.073713,-0.023097,0.03089,-0.020492,-0.105878,0.028497,0.007448,0.022675,0.030108,-0.013281,-0.019552
BLOOM,-0.026898,0.394885,12.995808,0.078846,-0.03053,-0.033487,0.071466,-0.134267,-0.001661,-0.030454,0.015694,-0.036878,-0.009852,-0.057063
BPI,0.197432,0.28019,11.620175,0.047339,-0.01619,0.040029,0.010921,-0.106013,0.042779,-0.006528,0.009094,0.044265,-0.005813,-0.011889
CBC,0.340479,0.186384,7.680736,0.058344,0.016174,0.054402,0.060666,-0.114269,0.030117,0.001338,0.043679,0.118864,-0.010966,0.013663


### 3.4 Moving Average Window

In [60]:
closing_prices_with_time_df.head()

Unnamed: 0_level_0,AC,ACEN,AEV,AGI,ALI,AREIT,BDO,BLOOM,BPI,CBC,...,MBT,MER,MONDE,PGOLD,SCC,SM,SMC,SMPH,TEL,URC
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-06-01,745.742852,7.126867,34.093254,9.86582,33.181935,28.876939,80.683465,6.438434,70.250094,20.011775,...,38.355134,227.273458,12.548869,32.004682,6.892854,922.658299,109.872241,35.070055,983.825397,119.078218
2021-06-02,774.90331,7.234112,34.749735,10.229097,35.030643,29.155749,81.217287,6.887173,71.416757,20.011775,...,38.394675,231.331912,12.548869,32.914151,7.047634,960.602081,113.563039,36.459444,990.631261,123.424138
2021-06-03,762.952303,7.351105,33.918192,10.171737,34.604018,28.876939,80.225903,6.955459,71.66676,19.642706,...,38.750548,230.520221,12.195118,32.610996,7.243688,958.680877,114.036219,35.836618,996.680917,122.381117
2021-06-04,770.600947,7.585093,33.830663,10.133498,34.509214,29.036261,81.598589,6.97497,71.541758,19.683713,...,38.829629,228.89684,12.474395,32.69761,7.130183,960.602081,112.143501,35.453337,1004.242988,121.68577
2021-06-07,760.084061,7.585093,34.312081,9.789341,34.414406,29.155749,81.141027,6.877418,70.958428,19.847745,...,38.631922,223.215003,12.530251,33.087384,7.223051,950.99606,111.197143,35.117969,1000.461953,122.554954


In [61]:
# Assuming your DataFrame is called `closing_prices_with_time_df`
for_ma_df = closing_prices_with_time_df.copy()

# Calculate the 20-day moving average for each stock
ma_df = for_ma_df.rolling(window=20, min_periods=1).mean()

# Optionally, rename columns to reflect that these are MAs
ma_df.columns = [f'{col}' for col in ma_df.columns]

In [35]:
display(ma_df)

Unnamed: 0_level_0,AC,ACEN,AEV,AGI,ALI,AREIT,BDO,BLOOM,BPI,CBC,...,MBT,MER,MONDE,PGOLD,SCC,SM,SMC,SMPH,TEL,URC
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2021-01-04,799.283365,8.829264,39.892819,9.884940,39.677872,23.025747,79.852794,8.135839,67.948152,19.690223,...,34.980631,232.757937,,35.166171,6.553257,1013.534383,126.451213,37.523379,999.262174,130.830737
2021-01-05,792.112760,8.560924,40.234513,9.751102,39.205516,22.927347,78.864140,8.043164,67.824458,19.572082,...,34.469964,234.414861,,34.213394,6.351979,1008.753560,124.139237,37.642880,1004.760682,131.131592
2021-01-06,781.436527,8.304125,40.120615,9.594956,39.063809,22.881427,77.900837,7.970001,67.480869,19.545828,...,33.801235,234.914566,,34.011289,6.244318,1003.972738,123.494397,37.523379,1004.638493,131.833598
2021-01-07,783.986075,8.164905,40.255868,9.567073,38.910294,22.917507,78.769078,7.926102,67.700767,19.552391,...,34.004894,234.769914,,33.931892,6.263042,1000.148080,123.101202,37.487528,1000.911726,132.163109
2021-01-08,786.758709,8.043654,40.319936,9.642119,38.978785,22.978515,79.594223,7.893910,68.508888,19.611462,...,34.426193,234.998726,,34.066147,6.302361,1000.913011,122.903030,37.714581,1009.526055,133.065687
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-19,578.550000,2.670500,33.732500,6.363500,23.990000,39.612500,160.178040,3.564500,136.515000,87.328383,...,75.550000,566.275000,7.406232,31.490628,32.915000,847.922983,79.112500,23.182009,1289.850000,83.285000
2025-05-20,577.375000,2.655000,33.890000,6.412000,23.955000,39.607500,159.956472,3.622500,136.470000,87.059987,...,75.745000,565.025000,7.417605,31.592219,32.817500,851.425805,79.177500,23.249752,1288.400000,84.235000
2025-05-21,577.525000,2.640000,34.010000,6.456500,23.907500,39.657500,159.770907,3.685000,136.550000,86.507269,...,75.852500,564.475000,7.425032,31.736392,32.760000,855.532372,79.267500,23.329289,1287.100000,85.112500
2025-05-22,577.975000,2.625000,34.122500,6.492000,23.817500,39.685000,159.620252,3.747000,136.600000,85.779552,...,75.850000,563.475000,7.425000,31.863065,32.712500,858.421053,79.317500,23.386428,1283.850000,85.737500


In [63]:
ma_df.to_csv('data/daily_moving_average_20.csv')

In [None]:
# temp_df_2 = pd.DataFrame()

# for year in range(2021, 2025):
#     # Calculate monthly average for the specified month
#     yearly_ma_20_avg = ma_df[ma_df.index.year == year].mean()
#     yearly_ma_20_avg = pd.DataFrame(yearly_ma_20_avg, index=None)
#     temp_df_2[f'Average_MA_20_{year}'] = yearly_ma_20_avg

#     # # Create the seasonality column
#     # returns[f'Average_MA_20{year}'] = pd.DataFrame(yearly_ma_20_avg)

In [64]:
temp_df_2 = pd.DataFrame()

# Ensure the index is a DateTimeIndex
ma_df = ma_df.copy()
ma_df.index = pd.to_datetime(ma_df.index)

# Group by year and quarter
quarterly_avg = ma_df.groupby([ma_df.index.year, ma_df.index.quarter]).mean()

# Reshape and rename columns
for (year, quarter), data in quarterly_avg.iterrows():
    col_name = f'Average_MA_20_Q{quarter}_{year}'
    temp_df_2[col_name] = data


In [65]:
display(temp_df_2)

Unnamed: 0,Average_MA_20_Q2_2021,Average_MA_20_Q3_2021,Average_MA_20_Q4_2021,Average_MA_20_Q1_2022,Average_MA_20_Q2_2022,Average_MA_20_Q3_2022,Average_MA_20_Q4_2022,Average_MA_20_Q1_2023,Average_MA_20_Q2_2023,Average_MA_20_Q3_2023,Average_MA_20_Q4_2023,Average_MA_20_Q1_2024,Average_MA_20_Q2_2024,Average_MA_20_Q3_2024,Average_MA_20_Q4_2024
AC,768.628921,740.927396,818.159693,820.536633,702.379723,650.357634,644.609515,673.578032,638.596609,604.454224,618.453887,679.540602,601.308559,594.402525,673.787089
ACEN,7.569759,8.698948,11.322948,9.027545,7.563727,7.880544,6.370086,6.926723,5.924312,5.134343,4.934937,4.17503,4.155828,5.110661,4.73977
AEV,35.209826,36.607679,44.447231,52.499642,47.986554,49.284052,50.879698,50.582008,49.663664,47.464827,44.605831,44.832534,38.758455,34.081335,33.89394
AGI,9.936975,9.720331,10.446196,11.994262,11.036258,9.349215,8.794565,11.982717,12.714692,12.398354,10.831536,10.813018,9.471187,8.773318,9.079592
ALI,34.759671,32.760865,33.775336,34.54702,30.567021,25.707618,25.495138,28.501691,25.69238,26.077227,28.429219,33.075556,28.391136,30.628882,32.445347
AREIT,29.103975,29.719575,35.764623,40.984138,34.520171,31.439446,28.810198,30.803822,29.628603,30.271244,29.276908,31.353132,32.209418,35.511726,37.761211
BDO,83.004259,85.211528,94.970865,99.643635,99.679828,94.309845,97.298142,112.914779,126.895904,133.407718,126.656569,137.618744,139.21296,139.676342,152.037029
BLOOM,6.805457,6.028906,6.622948,6.385627,6.162225,6.291031,6.766068,8.706719,9.795014,10.82541,9.748811,10.504482,10.279292,8.40876,6.93822
BPI,71.506555,70.651069,73.2422,81.197498,81.32547,78.322347,81.347085,90.696245,97.091342,105.36447,100.556728,108.327273,117.585675,119.965428,134.941372
CBC,19.976199,20.192914,20.25048,21.353098,22.593236,23.916475,23.186,25.4675,28.311223,28.17823,27.88656,29.929741,36.234425,39.545187,56.002169


In [66]:
quarterly_moving_average_df = temp_df_2.copy()
quarterly_moving_average_df.to_csv('data/quarterly_moving_average_data.csv')

In [22]:
display(returns)

Unnamed: 0,Returns,Volatility,P/E ratio,Seasonality_MoM_February,Seasonality_MoM_March,Seasonality_MoM_April,Seasonality_MoM_May,Seasonality_MoM_June,Seasonality_MoM_July,Seasonality_MoM_August,Seasonality_MoM_September,Seasonality_MoM_October,Seasonality_MoM_November,Seasonality_MoM_December
AC,-0.030299,0.309763,8.985726,-0.021342,-0.031874,-0.03095,-0.018541,0.017356,-0.037888,0.036458,0.038411,0.010366,0.012953,0.01242
ACEN,-0.210529,0.413843,14.889815,-0.033449,-0.050408,-0.023232,0.001346,0.143547,0.023465,0.058654,-0.026998,0.02273,0.002014,-0.040437
AEV,0.03699,0.354442,12.114948,-0.032329,-0.020351,-0.044006,-0.004718,0.050971,-0.01182,-0.003032,0.030413,0.007182,0.001394,0.025656
AGI,-0.02498,0.33052,2.881266,-0.005296,-0.031253,-0.025761,0.015595,0.01446,-0.024764,-0.000481,0.004712,-0.036617,-0.003091,0.047386
ALI,-0.061195,0.356969,11.838829,-0.010085,-0.046368,-0.05001,-0.046547,0.041654,-0.018834,0.042538,0.026979,0.005926,-0.005519,0.006822
AREIT,0.162499,0.251658,15.318954,0.015981,-0.015612,-0.020351,-0.025105,-0.030554,0.014111,0.023377,-0.000327,0.005731,0.034459,0.03251
BDO,0.213812,0.302677,10.499345,0.042667,-0.001196,0.022558,-0.020565,-0.082991,0.028602,0.007475,0.022759,0.030219,-0.01333,-0.019624
BLOOM,-0.072772,0.430759,12.995808,0.027818,-0.029899,-0.046798,0.091697,0.03632,-0.001767,-0.03241,0.016702,-0.039247,-0.010485,-0.060728
BPI,0.209771,0.28474,11.620175,0.04824,-0.014347,0.025267,-0.001942,-0.096027,0.042524,-0.006489,0.009039,0.044001,-0.005778,-0.011818
CBC,0.339373,0.272354,7.680736,0.146146,-0.014727,0.03828,-0.054903,-0.28599,0.026629,0.001183,0.038621,0.105099,-0.009696,0.01208


In [23]:
final_df = pd.concat([returns, temp_df_2], axis=1)
display(final_df)

Unnamed: 0,Returns,Volatility,P/E ratio,Seasonality_MoM_February,Seasonality_MoM_March,Seasonality_MoM_April,Seasonality_MoM_May,Seasonality_MoM_June,Seasonality_MoM_July,Seasonality_MoM_August,Seasonality_MoM_September,Seasonality_MoM_October,Seasonality_MoM_November,Seasonality_MoM_December,Average_MA_20_2021,Average_MA_20_2022,Average_MA_20_2023,Average_MA_20_2024
AC,-0.030299,0.309763,8.985726,-0.021342,-0.031874,-0.03095,-0.018541,0.017356,-0.037888,0.036458,0.038411,0.010366,0.012953,0.01242,761.23888,704.296929,633.987442,637.056579
ACEN,-0.210529,0.413843,14.889815,-0.033449,-0.050408,-0.023232,0.001346,0.143547,0.023465,0.058654,-0.026998,0.02273,0.002014,-0.040437,8.56232,7.7235,5.742494,4.551527
AEV,0.03699,0.354442,12.114948,-0.032329,-0.020351,-0.044006,-0.004718,0.050971,-0.01182,-0.003032,0.030413,0.007182,0.001394,0.025656,37.655183,50.145875,48.118105,37.856924
AGI,-0.02498,0.33052,2.881266,-0.005296,-0.031253,-0.025761,0.015595,0.01446,-0.024764,-0.000481,0.004712,-0.036617,-0.003091,0.047386,9.99741,10.294248,11.990449,9.528324
ALI,-0.061195,0.356969,11.838829,-0.010085,-0.046368,-0.05001,-0.046547,0.041654,-0.018834,0.042538,0.026979,0.005926,-0.005519,0.006822,34.00926,29.067172,27.179854,31.142297
AREIT,0.162499,0.251658,15.318954,0.015981,-0.015612,-0.020351,-0.025105,-0.030554,0.014111,0.023377,-0.000327,0.005731,0.034459,0.03251,29.595983,33.949751,30.017557,34.227669
BDO,0.213812,0.302677,10.499345,0.042667,-0.001196,0.022558,-0.020565,-0.082991,0.028602,0.007475,0.022759,0.030219,-0.01333,-0.019624,85.502202,97.694497,124.894053,142.12812
BLOOM,-0.072772,0.430759,12.995808,0.027818,-0.029899,-0.046798,0.091697,0.03632,-0.001767,-0.03241,0.016702,-0.039247,-0.010485,-0.060728,6.707825,6.39691,9.768868,9.022507
BPI,0.209771,0.28474,11.620175,0.04824,-0.014347,0.025267,-0.001942,-0.096027,0.042524,-0.006489,0.009039,0.044001,-0.005778,-0.011818,70.533721,80.514323,98.410798,120.213673
CBC,0.339373,0.272354,7.680736,0.146146,-0.014727,0.03828,-0.054903,-0.28599,0.026629,0.001183,0.038621,0.105099,-0.009696,0.01208,19.741931,22.772877,27.434514,40.437791


In [24]:
final_df.to_csv('Final_Draft_Dataset.csv')