In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from mylibs.jingyi_shen_paper import rfe_selection_with_all_stocks, grid_search_shen

### Prepare data

In [2]:
stock_list = ['600519.SH', '000858.SZ', '000596.SZ', '000568.SZ', '600779.SH', '600809.SH', '002304.SZ']

data = pd.read_csv(f'./data/jingyi_shen/data.csv')

n_features_to_select = 39
final_selected_features = rfe_selection_with_all_stocks(stock_list, data, n_features_to_select)

32 features are selected


In [3]:
ts_code = '000858.SZ'
target_data = data[data['ts_code'] == ts_code].set_index('trade_date').drop('ts_code', axis=1).sort_index().copy()
'''
target_data should contain three necessary clomuns: label, ret, close
ret and close are for functional purpose and are not necessarily in the features
'''
target_data = target_data[final_selected_features + ['ret', 'close', 'label']]

### Hyperparameter span

In [4]:
'''before running this cell, you should first set start='2014' in control panel then run generate_data()'''

'''
the parameter names end with their orders
because I use itertools.product to construct multiple for loops to make the code looks neat
then the order is very important and easy to be mistaken
'''
activation_function = 'relu'
split_span_0 = [0.1, 0.2]
validation_split_span_1 = [0.2]
lookback_span_2 = [2, 10, 15]
batch_size_span_3 = [32, 64, 256]
hu_span_4 = [32, 64, 256]
learning_rate_span_5 = [0.0005, 0.0001]
start_year_span_6 = ['2016', '2017']
layers_span_7 = [1, 2]
label_threshold_span_8 = [0.01, 0.005]

In [5]:
grid_search_path = f'./results/jingyi_shen/grid_search_result.csv'

In [6]:
%%time
grid_search_shen(target_data, grid_search_path, split_span_0, validation_split_span_1,
                 lookback_span_2, batch_size_span_3, hu_span_4, learning_rate_span_5,
                 start_year_span_6, layers_span_7, label_threshold_span_8, activation_function)

Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
1/864   0.12797492345174152 minutes
2/864   0.11927447319030762 minutes
3/864   0.16280913352966309 minutes
4/864   0.240507968266805 minutes
5/864   0.07630776166915894 minutes
6/864   0.13068257172902426 minutes
7/864   0.12724754412968953 minutes
8/864   0.13785770336786907 minutes
9/864   0.5970706582069397 minutes
10/864   1.1422918121019998 minutes
11/864   1.5506208062171936 minutes
12/864   1.564678160349528 minutes
13/864   0.46280675729115806 minutes
14/864   1.2270691394805908 minutes
15/864   1.3618256092071532 minutes
16/864   1.4961729248364766 minutes
17/864   0.0747353196144104 minutes
18/864   0.08522021373112997 minutes
19/864   0.12287695010503133 minutes
20/864   0.1274485190709432 minutes
21/86

201/864   0.2801526546478271 minutes
202/864   0.2754387855529785 minutes
203/864   0.24190850655237833 minutes
204/864   0.48130740722020465 minutes
205/864   0.19895351727803548 minutes
206/864   0.4183867891629537 minutes
207/864   0.22122387091318765 minutes
208/864   0.2928210496902466 minutes
209/864   0.044818619887034096 minutes
210/864   0.057939306894938154 minutes
211/864   0.08343734343846639 minutes
212/864   0.10856090386708578 minutes
213/864   0.04716463883717855 minutes
214/864   0.07554956674575805 minutes
215/864   0.08793505827585855 minutes
216/864   0.08809028466542562 minutes
217/864   0.08771947622299195 minutes
218/864   0.19966742197672527 minutes
219/864   0.17120126485824586 minutes
220/864   0.18356115420659383 minutes
221/864   0.1798330783843994 minutes
222/864   0.5328617930412293 minutes
223/864   0.20739795764287314 minutes
224/864   0.23669696251551312 minutes
225/864   0.04313851197560628 minutes
226/864   0.05125166972478231 minutes
227/864   0.1089

418/864   0.034438554445902506 minutes
419/864   0.06421266794204712 minutes
420/864   0.10349857807159424 minutes
421/864   0.029441467920939126 minutes
422/864   0.037372513612111406 minutes
423/864   0.06544252634048461 minutes
424/864   0.07390766938527425 minutes
425/864   0.07246869007746379 minutes
426/864   0.04763708511988322 minutes
427/864   0.09266879161198933 minutes
428/864   0.16681350469589235 minutes
429/864   0.02729302247365316 minutes
430/864   0.06908316612243652 minutes
431/864   0.2288089911142985 minutes
432/864   0.08710818688074748 minutes
433/864   0.0965635339419047 minutes
434/864   0.10658349196116129 minutes
435/864   0.8488464474678039 minutes
436/864   0.1457722783088684 minutes
437/864   0.08769263823827107 minutes
438/864   0.06931320031483969 minutes
439/864   0.14969590107599895 minutes
440/864   0.12631213267644245 minutes
441/864   0.9458739558855692 minutes
442/864   0.8594976147015889 minutes
443/864   1.3797271966934204 minutes
444/864   1.7381

636/864   0.43117720683415733 minutes
637/864   0.19313825766245524 minutes
638/864   0.2208577791849772 minutes
639/864   0.23699958324432374 minutes
640/864   0.35650627613067626 minutes
641/864   0.04323577880859375 minutes
642/864   0.05823856989542643 minutes
643/864   0.07156073649724325 minutes
644/864   0.11090412537256876 minutes
645/864   0.04650122324625651 minutes
646/864   0.05092479785283407 minutes
647/864   0.07932336330413818 minutes
648/864   0.08276417255401611 minutes
649/864   0.14051144917805988 minutes
650/864   0.11380395889282227 minutes
651/864   0.1507325251897176 minutes
652/864   0.24047977526982625 minutes
653/864   0.10643787781397501 minutes
654/864   0.124240775903066 minutes
655/864   0.22203402121861776 minutes
656/864   0.28584638436635335 minutes
657/864   0.040040123462677005 minutes
658/864   0.04984350601832072 minutes
659/864   0.10212522347768148 minutes
660/864   0.11467268069585164 minutes
661/864   0.04195726712544759 minutes
662/864   0.044

853/864   0.02660363515218099 minutes
854/864   0.03385633627573649 minutes
855/864   0.06751959323883057 minutes
856/864   0.07537986834843953 minutes
857/864   0.03363531430562337 minutes
858/864   0.03138700723648071 minutes
859/864   0.10129808187484741 minutes
860/864   0.09002043008804321 minutes
861/864   0.02955373525619507 minutes
862/864   0.033957656224568686 minutes
863/864   0.10573677221934001 minutes
864/864   0.14475291967391968 minutes
CPU times: total: 3h 43min 11s
Wall time: 2h 43min 47s


### Results

In [1]:
from mylibs.training_part import choose_best_hyper
ts_code = None
file_name = None
feature_df = choose_best_hyper(ts_code, file_name)
feature_df

Unnamed: 0,value
split,0.1
validation_split,0.2
lookback,10.0
batch_size,256.0
hu_span,256.0
learning_rate,0.0005
layers,2.0
label_threshold,0.01
start_year,2017.0
