In [1]:
import sys 
sys.path.append('adv_machine/')

import adv_machine.config as cf 
import adv_machine.get_prices as gp 
import adv_machine.universe as un 

import adv_machine.ml.validation_ml_optuna as validation_ml_optuna 
import adv_machine.ml.validation_ml as validation_ml 
import adv_machine.ml.utils_ml as ut_ml 

import pandas as pd 
from tqdm import tqdm 



In [5]:
df = pd.read_csv('available_stock.csv',delimiter=';')
df.index = df['Ticker']
ticker_to_screen = df.to_dict(orient='index')

us_stock = [ticker for ticker, screen in ticker_to_screen.items() if screen['Country'] == 'USA']

In [7]:
df = gp.get_stock_ohlc('^GSPC')
df = df[df.volume > 0]


In [8]:
df 

Unnamed: 0_level_0,open,high,low,close,volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1950-01-03,16.660000,16.660000,16.660000,16.660000,1260000
1950-01-04,16.850000,16.850000,16.850000,16.850000,1890000
1950-01-05,16.930000,16.930000,16.930000,16.930000,2550000
1950-01-06,16.980000,16.980000,16.980000,16.980000,2010000
1950-01-09,17.080000,17.080000,17.080000,17.080000,2520000
...,...,...,...,...,...
2025-01-08,5910.660156,5927.890137,5874.779785,5918.250000,4441740000
2025-01-10,5890.350098,5890.350098,5807.779785,5827.040039,4751930000
2025-01-13,5782.020020,5838.609863,5773.310059,5836.220215,4421200000
2025-01-14,5859.270020,5871.919922,5805.419922,5842.910156,4142280000


In [3]:

config_features  = {'volume':{'collect':{},'feature':{},'transformation': {'average':{'period':6*30,'method':'MA'}}}}
config_universe =  {'top':50,
                   'config_features':[config_features]}
universe = un.Universe(baseline_universe='us_stock',
                 config_universe=config_universe,
                 n_jobs=-1,
                 verbose=1)

end_date = '2024-11-01'



In [4]:
universe.compute_universe(end_date)

Loading universe from universe/rolling_universe_us_stock.json
Universe already computed for 2024-11-01
Processing 15639 dates with 50 products each
Selecting top 50 products for each date
Found first complete date: 1980-12-01 00:00:00
Filtered from 15639 to 11074 dates


In [5]:
print(f'First date of the universe: {list(universe.date_to_product_universe.keys())[0]}')
print(f'Last date of the universe: {list(universe.date_to_product_universe.keys())[-1]}')


print(f'First universe: {list(universe.date_to_product_universe.values())[0]}')
print(f'Last universe: {list(universe.date_to_product_universe.values())[-1]}')



First date of the universe: 1980-12-01 00:00:00
Last date of the universe: 2024-11-01 00:00:00
First universe: ['AAME', 'AXP', 'ADM', 'ASH', 'AGYS', 'LNT', 'AVA', 'ABM', 'ARW', 'AEP', 'WBA', 'ASA', 'ADX', 'ALOG', 'AZZ', 'AWR', 'ADP', 'FNMA', 'AFL', 'APA', 'AE', 'ADI', 'LUV', 'BAC', 'MO', 'ABT', 'APOG', 'AIT', 'AA', 'ALK', 'DD', 'ARTW', 'APD', 'AMAT', 'AEM', 'ATRO', 'AXR', 'PEO', 'BDX', 'AIG', 'AFG', 'SEE', 'KLIC', 'AVT', 'SHLM', 'AIR', 'LLY', 'ALX', 'AMD', 'AVY']
Last universe: ['GOOG', 'ADM', 'T', 'NVO', 'ABEV', 'AMZN', 'APH', 'WBA', 'ASX', 'SAVE', 'ALYI', 'ADMA', 'AG', 'ANET', 'AMC', 'APA', 'JBLU', 'AHRO', 'LUV', 'DAL', 'AES', 'BAC', 'MO', 'ABT', 'AVGO', 'PAAS', 'APP', 'AR', 'AAL', 'AAPL', 'AA', 'AI', 'BTI', 'GOOGL', 'AZN', 'AGNC', 'ARDX', 'AMAT', 'BABA', 'AMLH', 'AIG', 'ADT', 'PANW', 'AKTS', 'AEO', 'APDN', 'ABBV', 'NLY', 'AMD', 'ARRY']


In [None]:
universe.compute_universe(end_date)

## Optuna Train Pipeline 

In [None]:
baseline_universe = 'BINANCE_future_USDT'
timestamp_calculation = '23:59:00'
config_features  = {'cumulative_1d_volume':{'collect':{'type':"future"},'feature':{},'transformation': {'average':{'period':6*30,'method':'MA'}}}}
 
config_universe = {'timestamp_calculation':timestamp_calculation,
                   'top':50,
                   'config_features':[config_features]}



config_score = {}


size_basket = 5
config_aggregation = {'top':size_basket,'bottom':size_basket}

end_date = "2024-11-20"
timestamp_execution = '23:59:00'
config_freq_execution = {'freq':'1d','timestamps':[timestamp_execution]}

fees_bps = 5

verbose = 3
notional = 10000 

#Liquidity Features 

volume_range_position_60 = {'volume_range_position':{'collect':{'type':'future'},'feature': {'period':60}}}
volume_range_position_14 = {'volume_range_position':{'collect':{'type':'future'},'feature': {'period':14}}}
volume_range_position_30 = {'volume_range_position':{'collect':{'type':'future'},'feature': {'period':30}}}
volume_range_position_90 = {'volume_range_position':{'collect':{'type':'future'},'feature': {'period':90}}}

distance_from_ewma_volume_60 = {'distance_from_ewma_volume':{'collect':{'type':'future'},'feature': {'period':60}}}
distance_from_ewma_volume_14 = {'distance_from_ewma_volume':{'collect':{'type':'future'},'feature': {'period':14}}}
distance_from_ewma_volume_30 = {'distance_from_ewma_volume':{'collect':{'type':'future'},'feature': {'period':30}}}
distance_from_ewma_volume_90 = {'distance_from_ewma_volume':{'collect':{'type':'future'},'feature': {'period':90}}}

volume_pct_change_1 = {'volume_pct':{'collect':{'type':'future'},'feature': {'period':1}}}
volume_pct_change_3 = {'volume_pct':{'collect':{'type':'future'},'feature': {'period':3}}}
volume_pct_change_7 = {'volume_pct':{'collect':{'type':'future'},'feature': {'period':7}}}



#Momentum/Mean Reversion Features 

return_pct_7 = {'return_pct':{'collect':{},'feature': {'period':7}}}
return_pct_30 = {'return_pct':{'collect':{},'feature': {'period':30}}}

return_vol_ratio_90 = {'return_vol_ratio':{'collect':{},'feature': {'period':90}}}
return_vol_ratio_120 = {'return_vol_ratio':{'collect':{},'feature': {'period':120}}}

distance_from_low_5 = {'distance_from_low':{'collect':{},'feature': {'period':5}}}
distance_from_low_15 = {'distance_from_low':{'collect':{},'feature': {'period':15}}}
distance_from_low_30 = {'distance_from_low':{'collect':{},'feature': {'period':30}}}

distance_from_high_5 = {'distance_from_high':{'collect':{},'feature': {'period':5}}}
distance_from_high_15 = {'distance_from_high':{'collect':{},'feature': {'period':15}}}
distance_from_high_30 = {'distance_from_high':{'collect':{},'feature': {'period':30}}}

cci_15 = {'cci':{'collect':{},'feature': {'period':15}}}
cci_30 = {'cci':{'collect':{},'feature': {'period':30}}}
cci_60 = {'cci':{'collect':{},'feature': {'period':60}}}

range_position_30  = {'range_position':{'collect':{},'feature': {'period':30}}}










config_features = [volume_range_position_60, 
                   volume_range_position_14,
                   volume_range_position_30,
                   volume_range_position_90,
                   distance_from_ewma_volume_60,
                   distance_from_ewma_volume_14,
                   distance_from_ewma_volume_30,
                   distance_from_ewma_volume_90,
                   volume_pct_change_1,
                   volume_pct_change_3,
                   volume_pct_change_7,
                   illiquid_ratio_30,
                   illiquid_ratio_60,
                   illiquid_ratio_90,
                   return_pct_1,
                   return_pct_7,
                   return_pct_30,
                   return_vol_ratio_90,
                   return_vol_ratio_120,
                   distance_from_low_5,
                   distance_from_low_15,
                   distance_from_low_30,
                   average_max_return_7,
                   average_min_return_7,
                   cci_15,
                   cci_30,
                   cci_60,
                   ]

config_target_return = {'return_pct':{'feature':{'period':1},'collect':{'type':'future'}}}