In [1]:
import numpy as np
import pandas as pd


from mkl_solver import primal_dual_opt


In [12]:
path='/home/jbohn/jupyter/personal/Kernel_Learning/'

labeled_data=pd.read_csv(path+'data/labeled_data.csv')
labeled_data.index=labeled_data['last_interval']
features=labeled_data[['FB0','FA0','FB2','FA2']]
outcomes=labeled_data['outcome']



### Perform Normalization

In [13]:
def normalize_features(features):
    return (features-features.mean())/features.std()



features=normalize_features(features)

features,outcomes

(                          FB0       FA0       FB2       FA2
 last_interval                                              
 2020-01-02 09:30:00 -0.279873 -0.233347 -0.269437  0.023786
 2020-01-02 09:31:00 -0.462780 -0.156346 -0.002077  0.023786
 2020-01-02 09:32:00 -0.096967 -0.156346 -0.002077  0.110666
 2020-01-02 09:33:00 -0.462780 -0.156346 -0.269437  0.023786
 2020-01-02 09:34:00 -0.096967 -0.002345 -0.002077  0.110666
 ...                       ...       ...       ...       ...
 2020-01-09 15:55:00 -0.279873 -0.156346 -0.002077  0.110666
 2020-01-09 15:56:00 -0.462780 -0.002345 -0.002077  0.023786
 2020-01-09 15:57:00 -0.279873 -0.233347  0.265283 -0.236856
 2020-01-09 15:58:00  0.268846  0.459659 -0.002077 -1.540066
 2020-01-09 15:59:00 -0.279873 -0.002345 -0.002077  0.110666
 
 [3218 rows x 4 columns],
 last_interval
 2020-01-02 09:30:00   -1
 2020-01-02 09:31:00   -1
 2020-01-02 09:32:00    1
 2020-01-02 09:33:00    1
 2020-01-02 09:34:00    1
                       ..
 2020-01

In [14]:
def batch_features(features,outcomes,batch_size):
    """ Returns a batch of features and outcomes
    """
    batched_dict={}
    for i in range(0,len(features),batch_size):
        # save the features and outcomes for each batch; timestamped by last interval 
        batched_dict[i/batch_size]={"last_interval":features.index[i] ,"features":features[i:i+batch_size], "outcomes":outcomes[i:i+batch_size]}
    return batched_dict

In [15]:
batched_dict=batch_features(features,outcomes,100)
batched_dict[0]

{'last_interval': '2020-01-02 09:30:00',
 'features':                           FB0       FA0       FB2       FA2
 last_interval                                              
 2020-01-02 09:30:00 -0.279873 -0.233347 -0.269437  0.023786
 2020-01-02 09:31:00 -0.462780 -0.156346 -0.002077  0.023786
 2020-01-02 09:32:00 -0.096967 -0.156346 -0.002077  0.110666
 2020-01-02 09:33:00 -0.462780 -0.156346 -0.269437  0.023786
 2020-01-02 09:34:00 -0.096967 -0.002345 -0.002077  0.110666
 ...                       ...       ...       ...       ...
 2020-01-02 11:05:00  0.268846  0.074656 -0.002077  0.110666
 2020-01-02 11:06:00  0.268846 -0.233347  0.265283  0.023786
 2020-01-02 11:07:00 -0.279873 -0.156346 -0.269437  0.023786
 2020-01-02 11:08:00 -0.279873 -0.233347 -0.002077 -0.063095
 2020-01-02 11:09:00 -0.462780 -0.156346 -2.140955  0.023786
 
 [100 rows x 4 columns],
 'outcomes': last_interval
 2020-01-02 09:30:00   -1
 2020-01-02 09:31:00   -1
 2020-01-02 09:32:00    1
 2020-01-02 09:33:00  

### Problem Setup

### Idea is we solve the optimization problem in batches (track kernel allocation weight in live)

In [22]:
def batch_solve_mkl(X,y,m,batch_size,kernel_type,order,gap=10e-2,inner_tol=10e-3,weight_threshold=0.01,maxouter_iter=100,maxinner_iter=10 ,batch_verbose=True,verbose=True):
    """ Solves the MKL problem for a batch of data
    """
    n=X.shape[0]
    batched_dict=batch_features(X,y,batch_size)
    batched_estimates=np.zeros((n,m))
    for i in range(0,n,batch_size):
        weights,kernel=primal_dual_opt(batched_dict[i/batch_size]["features"].values,batched_dict[i/batch_size]["outcomes"].values,m,kernel_type,order,gap,inner_tol,weight_threshold,maxouter_iter,maxinner_iter ,verbose)
        batched_estimates[i,:]=weights
        if batch_verbose:
            print("Batch ",i,"Last Interval", batched_dict[i/batch_size]["last_interval"], "complete with weights ",weights)
    return batched_estimates

### Compare simulated MKL performance across each batch against single kernel performance 
- Functionality for either gaussian or polynomial basis of kernels

In [23]:
batched_estimates=batch_solve_mkl(features,outcomes,3,100,'polynomial',3,verbose=False)

Batch  0 Last Interval 2020-01-02 09:30:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  100 Last Interval 2020-01-02 11:10:00 complete with weights  [0.         0.34140198 0.65859802]
Batch  200 Last Interval 2020-01-02 12:50:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  300 Last Interval 2020-01-02 14:30:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  400 Last Interval 2020-01-02 16:10:00 complete with weights  [0.41189952 0.3135032  0.27459728]
Batch  500 Last Interval 2020-01-02 18:25:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  600 Last Interval 2020-01-03 10:05:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  700 Last Interval 2020-01-03 11:45:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  800 Last Interval 2020-01-03 13:25:00 complete with weights  [0.33333333 0.33333333 0.33333333]
Batch  900 Last Interval 2020-01-03 15:05:00 complete with weights  [0.    