In [123]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

In [219]:
def get_series(TICKER):
    '''
    input : a ticker 
    output : the corresponding dataframe
    '''    
    path = "/Users/edouardcuny/Desktop/quant/Carmela/data/" + TICKER + ".csv"
    df = pd.read_csv(path, index_col='Date', dtype={'Adj Close': np.float64}, na_values='null')
    df = df['Adj Close']
    df = df.rename(TICKER)
    # print str(TICKER) + ' : ' + str(len(df))
    return df

In [220]:
GLE = get_series("GLE.PA")

In [221]:
def print_info_stock(df):
    print 'min date : ' + str(min(df.index))
    print 'max date : ' + str(max(df.index))
    print 'nb dates : ' + str(len(df))
    print 'null     : ' + str(sum(df.isnull()))

In [222]:
print_info_stock(GLE)

min date : 2012-01-02
max date : 2017-12-29
nb dates : 1532
null     : 1


In [223]:
GLE[GLE.isnull()]
GLE = GLE[GLE.notnull()] # on décide d'enlever les lignes où ça déconnait (pas de valeur)

# 1. FEATURE ENGINEERING
obj = predict cumulative return in 5 trading days 

**FEATURES**
- ~~outside to inside BB~~
- ~~adjusted close/SMA~~
- ~~previous adjusted close/SMA~~
- ~~dérivée du adjusted close / SMA~~
- ~~crossed SMA up~~
- ~~crossed SMA down~~
- momentum 1
- momentum 5
- momentum 10

## BOLLINGER BANDS

In [244]:
def df_bollinger_features(stock, window_size):
    '''
    input = series, d'ajusted close d'un stock au cours du temps
    output = dataframe avec pour colonnes :
        - in_BB : bool qui vaut 1 si le spot est dans la bollinger band
        - pr_in_BB : bool qui vaut 1 si le spot était dans la bollinger band au précédent jour de trade
        - out_to_in_BB : bool qui vaut 1 si le spot vient de traverser la bollinger band
        - rolling mean : la moyenne roulante sur la taille window_size
    '''
    
    # BOLLINGER BANDS
    rolling_mean = stock.rolling(window=window_size).mean()
    rolling_std = stock.rolling(window=window_size).std()
    upper_bb = rolling_mean + 2*rolling_std
    lower_bb = rolling_mean - 2*rolling_std

    # plot pour vérification visuelle, décommenter pour voir un truc joli
    '''
    ax = stock[:100].plot()
    rolling_mean[:100].plot(ax=ax)
    upper_bb[:100].plot(ax=ax, color='c')
    lower_bb[:100].plot(ax=ax, color='c')
    plt.show()
    '''

    # inside BB
    in_BB = (stock < upper_bb) & (stock > lower_bb)
    in_BB[:window_size] = np.NaN
    
    # previous inside BB
    pr_in_BB = in_BB.shift(1) 

    # outside to inside BB
    out_to_in_BB = (pr_in_BB == 0) & (in_BB == 1)
    out_to_in_BB[:window_size+1] = np.NaN
    
    
    # Adjusted Close / SMA
    spike = stock/rolling_mean
    pr_spike = spike.shift(1)
    spike_derivative = spike - pr_spike
    crossed_RM_up = (pr_spike < 1) & (spike > 1)
    crossed_RM_down = (pr_spike > 1) & (spike < 1)
    crossed_RM_up[:window_size] = np.NaN
    crossed_RM_down[:window_size] = np.NaN
    
    # rename columns
    stock = stock.rename('Adj_Close')
    in_BB = in_BB.rename('in_BB')
    pr_in_BB = pr_in_BB.rename('pr_in_BB')
    rolling_mean = rolling_mean.rename('rolling_mean')
    out_to_in_BB = out_to_in_BB.rename('out_to_in_BB')
    spike = spike.rename('spike')
    pr_spike = pr_spike.rename('pr_spike')
    spike_derivative = spike_derivative.rename('spike_derivative')
    crossed_RM_up = crossed_RM_up.rename('crossed_RM_up')
    crossed_RM_down = crossed_RM_down.rename('crossed_RM_down')
    
    
    stock_df = pd.concat([in_BB,pr_in_BB,out_to_in_BB,rolling_mean,spike,pr_spike,spike_derivative,crossed_RM_up,crossed_RM_down], axis=1)
    return stock_df

# MOMENTUM

In [255]:
def df_momentum(stock):
    '''
    input = series, d'ajusted close d'un stock au cours du temps
    output = dataframe avec pour colonnes :
        - mom_1 : le % de cumulative return sur le dernier jour de trading
        - mom_5 : le % de cumulative return sur les 5 derniers jours de trading
        - mom_10 : le % de cumulative return sur les 1à derniers jours de trading
    '''
    mom_1 = (stoick/GLE.shift(1)-1)*100
    mom_5 = (GLE/GLE.shift(5)-1)*100
    mom_10 = (GLE/GLE.shift(10)-1)*100

    
    

Date
2012-01-02    14.964975
2012-01-03    14.893049
2012-01-04    14.385330
2012-01-05    13.606832
2012-01-06    13.171040
2012-01-09    12.692940
2012-01-10    13.323355
2012-01-11    13.454515
2012-01-12    13.788764
2012-01-13    13.826841
2012-01-16    14.000312
2012-01-17    14.266866
2012-01-18    15.028440
2012-01-19    17.004307
2012-01-20    17.774345
2012-01-23    19.293266
2012-01-24    18.252445
2012-01-25    17.770115
2012-01-26    17.778576
2012-01-27    17.829350
Name: GLE.PA, dtype: float64
Date
2012-01-02          NaN
2012-01-03          NaN
2012-01-04          NaN
2012-01-05          NaN
2012-01-06          NaN
2012-01-09          NaN
2012-01-10          NaN
2012-01-11          NaN
2012-01-12          NaN
2012-01-13          NaN
2012-01-16    -6.446138
2012-01-17    -4.204532
2012-01-18     4.470596
2012-01-19    24.968891
2012-01-20    34.950201
2012-01-23    51.999978
2012-01-24    36.995862
2012-01-25    32.075478
2012-01-26    28.935240
2012-01-27    28.947386
N

In [245]:
GLE_df = pd.concat([GLE,df_bollinger_features(GLE,10)], axis=1)
GLE_df.iloc[:70,:]

Unnamed: 0_level_0,GLE.PA,in_BB,pr_in_BB,out_to_in_BB,rolling_mean,spike,pr_spike,spike_derivative,crossed_RM_up,crossed_RM_down
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2012-01-02,14.964975,,,,,,,,,
2012-01-03,14.893049,,,,,,,,,
2012-01-04,14.385330,,,,,,,,,
2012-01-05,13.606832,,,,,,,,,
2012-01-06,13.171040,,,,,,,,,
2012-01-09,12.692940,,,,,,,,,
2012-01-10,13.323355,,,,,,,,,
2012-01-11,13.454515,,,,,,,,,
2012-01-12,13.788764,,,,,,,,,
2012-01-13,13.826841,,,,13.810764,1.001164,,,,


# résultats

- plot du return vs expected return
- % tu temps où directionnel bon
- print de 50 résultats 