In [16]:
import numpy as np
import pandas as pd
import sys
sys.path.append('/home/jbohn/jupyter/personal/Kernel_Learning/')
from Kernels.mkl_solver import primal_dual_opt


In [17]:

path='/home/jbohn/jupyter/personal/Kernel_Learning/'
labeled_data=pd.read_csv(path+'data/labeled_data.csv')
labeled_data.index=labeled_data['last_interval']
features=labeled_data[['FB0','FA0','FB2','FA2']]
outcomes=labeled_data['outcome']



### Perform Normalization

In [18]:
def normalize_features(features):
    return (features-features.mean())/features.std()



features=normalize_features(features)

features,outcomes

(                          FB0       FA0       FB2       FA2
 last_interval                                              
 2020-01-02 09:30:00 -0.757752 -0.434162 -0.602106  0.062707
 2020-01-02 09:31:00 -0.757752 -0.244125 -0.053458  0.062707
 2020-01-02 09:32:00 -0.118783 -0.244125 -0.053458  0.212742
 2020-01-02 09:33:00 -0.757752 -0.244125 -0.602106  0.062707
 2020-01-02 09:34:00 -0.118783  0.135950 -0.053458  0.212742
 ...                       ...       ...       ...       ...
 2020-01-02 15:55:00  0.520187 -0.434162 -0.053458  0.062707
 2020-01-02 15:56:00  0.839672 -0.244125 -0.053458 -0.237363
 2020-01-02 15:57:00 -0.757752 -0.054088 -0.053458  0.062707
 2020-01-02 15:58:00 -0.118783  0.325987 -0.053458 -2.337850
 2020-01-02 15:59:00  0.839672 -0.244125  2.689780  0.062707
 
 [390 rows x 4 columns],
 last_interval
 2020-01-02 09:30:00   -1
 2020-01-02 09:31:00   -1
 2020-01-02 09:32:00    1
 2020-01-02 09:33:00    1
 2020-01-02 09:34:00    1
                       ..
 2020-01-

In [19]:
def batch_features(features,outcomes,batch_size):
    """ Returns a batch of features and outcomes
    """
    batched_dict={}
    for i in range(0,len(features),batch_size):
        # save the features and outcomes for each batch; timestamped by last interval 
        batched_dict[i/batch_size]={"last_interval":features.index[i] ,"features":features[i:i+batch_size], "outcomes":outcomes[i:i+batch_size]}
    return batched_dict

In [20]:
batched_dict=batch_features(features,outcomes,100)
batched_dict[0]

{'last_interval': '2020-01-02 09:30:00',
 'features':                           FB0       FA0       FB2       FA2
 last_interval                                              
 2020-01-02 09:30:00 -0.757752 -0.434162 -0.602106  0.062707
 2020-01-02 09:31:00 -0.757752 -0.244125 -0.053458  0.062707
 2020-01-02 09:32:00 -0.118783 -0.244125 -0.053458  0.212742
 2020-01-02 09:33:00 -0.757752 -0.244125 -0.602106  0.062707
 2020-01-02 09:34:00 -0.118783  0.135950 -0.053458  0.212742
 ...                       ...       ...       ...       ...
 2020-01-02 11:05:00  0.520187  0.325987 -0.053458  0.212742
 2020-01-02 11:06:00  0.520187 -0.434162  0.495190  0.062707
 2020-01-02 11:07:00 -0.438268 -0.244125 -0.602106  0.062707
 2020-01-02 11:08:00 -0.438268 -0.434162 -0.053458 -0.087328
 2020-01-02 11:09:00 -0.757752 -0.244125 -4.442640  0.062707
 
 [100 rows x 4 columns],
 'outcomes': last_interval
 2020-01-02 09:30:00   -1
 2020-01-02 09:31:00   -1
 2020-01-02 09:32:00    1
 2020-01-02 09:33:00  

### Problem Setup

### Idea is we solve the optimization problem in batches (track kernel allocation weight in live)

In [21]:
def batch_solve_mkl(X,y,m,batch_size,kernel_type,order,gap=10e-2,inner_tol=10e-3,weight_threshold=0.01,maxouter_iter=100,maxinner_iter=10 ,batch_verbose=True,verbose=True):
    """ Solves the MKL problem for a batch of data
    """
    n=X.shape[0]
    batched_dict=batch_features(X,y,batch_size)
    batched_estimates=np.zeros((n,m))
    for i in range(0,n,batch_size):
        weights,kernel=primal_dual_opt(batched_dict[i/batch_size]["features"].values,batched_dict[i/batch_size]["outcomes"].values,m,kernel_type,order,gap,inner_tol,weight_threshold,maxouter_iter,maxinner_iter ,verbose)
        batched_estimates[i,:]=weights
        if batch_verbose:
            print("Batch ",i,"Last Interval", batched_dict[i/batch_size]["last_interval"], "complete with weights ",weights)
    return batched_estimates

### Compare simulated MKL performance across each batch against single kernel performance 
- Functionality for either gaussian or polynomial basis of kernels

In [22]:
batched_estimates=batch_solve_mkl(features,outcomes,3,100,'polynomial',3,verbose=False)

Batch  0 Last Interval 2020-01-02 09:30:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  100 Last Interval 2020-01-02 11:10:00 complete with weights  [0.         0.33110055 0.66889945]
Batch  200 Last Interval 2020-01-02 12:50:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  300 Last Interval 2020-01-02 14:30:00 complete with weights  [0.33333333 0.33333333 0.33333333]
