In [None]:
import pandas as pd
import numpy as np
from sklearn import preprocessing

In [None]:
market_train_df = pd.read_feather('../input/market_train_df.feather')

In [None]:
bac = market_train_df[market_train_df['assetCode']=='BAC.N'].set_index('time').resample('1D').ffill().reset_index()

In [None]:
market_returns = market_train_df['returnsOpenPrevRaw10'].groupby(market_train_df['time'].dt.normalize()).mean().resample('1D').ffill().to_frame().rename(columns={'returnsOpenPrevRaw10': 'marketReturnsOpenPrevRaw10'})

In [None]:
data = pd.merge(bac, market_returns, how='left', on='time').dropna(subset=['assetCode', 'returnsOpenPrevRaw10', 'marketReturnsOpenPrevRaw10'])
for c in (set(market_train_df.columns) & set(data.columns)):
    data[c] = data[c].astype(market_train_df[c].dtype)

In [None]:
data['diffReturnOpenPrevRaw10'] = data['returnsOpenPrevRaw10'] - data['marketReturnsOpenPrevRaw10']

In [None]:
# negative difference from market
x = data[data['diffReturnOpenPrevRaw10'] < 0]['diffReturnOpenPrevRaw10'].values.reshape(-1, 1)
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
data.loc[data['diffReturnOpenPrevRaw10'] < 0, 'y'] = -1*x_scaled

# positive difference from market
x = data[data['diffReturnOpenPrevRaw10'] >= 0]['diffReturnOpenPrevRaw10'].values.reshape(-1, 1)
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
data.loc[data['diffReturnOpenPrevRaw10'] >= 0, 'y'] = x_scaled

data['y'] = data['y'].astype(np.float32)

In [None]:
data['y'].describe()

In [None]:
data = data.drop(['marketReturnsOpenPrevRaw10', 'diffReturnOpenPrevRaw10'], axis=1).reset_index(drop=True)

In [None]:
data.to_feather('bac_market_data.feather')