In [19]:
%matplotlib widget

In [20]:

import pandas as pd

import matplotlib.pyplot as plt
import pandas_ta as ta

## Load parquet

In [21]:
df = pd.read_parquet("data.parquet")

In [22]:
df.groupby("name").count()

Unnamed: 0_level_0,id,figi,ts,open_price,close_price,high_price,low_price,volume
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
AT&T,101134,101134,101134,101134,101134,101134,101134,101134
Apple,119367,119367,119367,119367,119367,119367,119367,119367
JPMorgan,85536,85536,85536,85536,85536,85536,85536,85536


## Apply processing
Strategy can be applied only on by-name group

In [23]:
def concat_df_with_col(df, column):
    return pd.concat([df, column], axis=1)

In [24]:
def calc_metrics(df, 
    RSI_oversold_threshold = 30,
    RSI_overbouht_threshold = 70,
    MFI_oversold_threshold = 20,
    MFI_overbouht_threshold = 80):
    # Add MACD
    df = concat_df_with_col(df, df.groupby("name").apply(lambda x: x.ta.macd()).reset_index(level=0, drop=True))

    # Add RSI
    df = concat_df_with_col(df, df.groupby("name").apply(lambda x: x.ta.rsi()).reset_index(level=0, drop=True))

    # Add MFI
    df = concat_df_with_col(df, df.groupby("name").apply(lambda x: x.ta.mfi()).reset_index(level=0, drop=True))
    
    # # Add RSI oversold and overbought
    # df["RSI_oversold"] = df["RSI_14"] <= RSI_oversold_threshold
    # df["RSI_overbought"] = df["RSI_14"] >= RSI_overbouht_threshold

    # # Add MFI oversold and overbought
    # df["MFI_oversold"] = df["MFI_14"] <= MFI_oversold_threshold
    # df["MFI_overbought"] = df["MFI_14"] >= MFI_overbouht_threshold
    
    # # Add MACD signal crossover and cross under
    # df["MACD_diff"] = (df["MACD_12_26_9"] - df["MACDs_12_26_9"])
    # df["MACD_crossover"] = (df["MACD_diff"] >= 0) & (df["MACD_diff"].shift() <= 0)
    # df["MACD_crossunder"] = (df["MACD_diff"] <= 0) & (df["MACD_diff"].shift() >= 0)
    
    return df

In [25]:
df = calc_metrics(df)

## Show result

In [26]:
df.head()

Unnamed: 0,name,id,figi,ts,open_price,close_price,high_price,low_price,volume,MACD_12_26_9,MACDh_12_26_9,MACDs_12_26_9,RSI_14,MFI_14
0,JPMorgan,1,BBG000DMBXR2,2018-01-23 21:45:00+00:00,114.02,114.02,114.02,114.02,6.0,,,,,
1,JPMorgan,1,BBG000DMBXR2,2018-01-23 21:35:00+00:00,114.02,114.02,114.02,114.02,10.0,,,,,
2,JPMorgan,1,BBG000DMBXR2,2018-01-23 20:55:00+00:00,114.2,114.23,114.26,114.16,76837.0,,,,100.0,
3,JPMorgan,1,BBG000DMBXR2,2018-01-23 20:50:00+00:00,114.37,114.21,114.39,114.19,34299.0,,,,90.697674,
4,JPMorgan,1,BBG000DMBXR2,2018-01-23 20:45:00+00:00,114.33,114.36,114.41,114.31,27240.0,,,,94.688458,


## Transform data

In [27]:
column_names = {
    "open_price":"open",
    "close_price":"close",
    "high_price":"high",
    "low_price":"low",
    "volume":"volume",
    "MACD_12_26_9":"MACD",
    "MACDh_12_26_9":"MACDh",
    "MACDs_12_26_9":"MACDs",
    "RSI_14":"RSI",
    "MFI_14":"MFI"
}

df = df.rename(columns=column_names)

In [28]:
df.head()

Unnamed: 0,name,id,figi,ts,open,close,high,low,volume,MACD,MACDh,MACDs,RSI,MFI
0,JPMorgan,1,BBG000DMBXR2,2018-01-23 21:45:00+00:00,114.02,114.02,114.02,114.02,6.0,,,,,
1,JPMorgan,1,BBG000DMBXR2,2018-01-23 21:35:00+00:00,114.02,114.02,114.02,114.02,10.0,,,,,
2,JPMorgan,1,BBG000DMBXR2,2018-01-23 20:55:00+00:00,114.2,114.23,114.26,114.16,76837.0,,,,100.0,
3,JPMorgan,1,BBG000DMBXR2,2018-01-23 20:50:00+00:00,114.37,114.21,114.39,114.19,34299.0,,,,90.697674,
4,JPMorgan,1,BBG000DMBXR2,2018-01-23 20:45:00+00:00,114.33,114.36,114.41,114.31,27240.0,,,,94.688458,


In [29]:
df.groupby("id").count()

Unnamed: 0_level_0,name,figi,ts,open,close,high,low,volume,MACD,MACDh,MACDs,RSI,MFI
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
1,85536,85536,85536,85536,85536,85536,85536,85536,85511,85511,85528,85534,85523
2,119367,119367,119367,119367,119367,119367,119367,119367,119342,119342,119359,119365,119354
3,101134,101134,101134,101134,101134,101134,101134,101134,101109,101109,101126,101133,101121


In [30]:
result_df = pd.DataFrame()

for i, (index, dfx) in enumerate(df.groupby("id")):
    prefix = "%d_" % i
    columns = ["open", "close", "high", "low", "volume","MACD","MACDh","MACDs","RSI","MFI"]
    columns_with_prefix = {x : prefix + x for x in columns}
    # Select columns
    dfx = dfx[["ts"] + columns]
    # Rename columns
    dfx = dfx.rename(columns=columns_with_prefix)
    # Concat columns
    if result_df.empty:
        # Copy
        result_df = dfx
    else:
        result_df = pd.merge(result_df, dfx, on="ts")

# Remove timestamp
result_df = result_df.drop("ts", axis=1)

In [31]:
result_df.head()

Unnamed: 0,0_open,0_close,0_high,0_low,0_volume,0_MACD,0_MACDh,0_MACDs,0_RSI,0_MFI,...,2_open,2_close,2_high,2_low,2_volume,2_MACD,2_MACDh,2_MACDs,2_RSI,2_MFI
0,114.2,114.23,114.26,114.16,76837.0,,,,100.0,,...,37.2,37.19,37.26,37.18,98644.0,,,,32.049265,
1,114.37,114.21,114.39,114.19,34299.0,,,,90.697674,,...,37.15,37.2,37.2,37.14,38002.0,,,,36.604042,
2,114.33,114.36,114.41,114.31,27240.0,,,,94.688458,,...,37.15,37.15,37.18,37.14,33423.0,,,,26.896268,
3,114.32,114.32,114.34,114.25,22342.0,,,,84.302204,,...,37.22,37.14,37.22,37.14,59713.0,,,0.0,25.442911,
4,114.36,114.32,114.37,114.29,16668.0,,,,84.302204,,...,37.26,37.22,37.26,37.22,17075.0,,,0.0,49.126458,


In [32]:
result_df.shape

(80438, 30)

## Save results

In [33]:
result_df.to_parquet("processed_data.parquet", compression="gzip")