In [1]:
# Trying to fit the function of https://arxiv.org/pdf/1010.1005.pdf
# see eda_026

fname='newling_003'

n_tta = 6

seed = 0

In [2]:
import numpy as np
import pandas as pd
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import confusion_matrix
from scipy.optimize import curve_fit
import gc
import matplotlib.pyplot as plt
import seaborn as sns
import logging
from tqdm import tqdm_notebook
import itertools
import pickle as pkl

from multiprocessing import Pool

In [3]:
import random as rn
def init_seeds(seed):

    # The below is necessary for starting Numpy generated random numbers
    # in a well-defined initial state.

    np.random.seed(seed)

    # The below is necessary for starting core Python generated random numbers
    # in a well-defined state.

    rn.seed(seed)


init_seeds(seed)

In [17]:
def curve(t, A, phi, k, sigma):
    res = A * ((t - phi) / sigma)**k * np.exp(-(t - phi) / sigma) * np.exp(k) / k**k
    res = np.where(t <= phi, 0, res)
    return res

def get_newling(train, object_id):
    print(object_id)
    passbands = range(6)
    res = pd.DataFrame()
    res['object_id'] = [object_id]
    for pb in passbands:
        res['newling_A_%d' % pb] = np.NaN
        res['newling_k_%d' % pb] = np.NaN
        res['newling_sigma_%d' % pb] = np.NaN
    df0 = train[(train.object_id == object_id)]
    if df0.hostgal_photoz.mean() == 0:
        return res
    offset = 11
    t0s = []
    trises = []
    p0s = []
    bmins = []
    bmaxs = []
    xs = []
    ys = []
    y_errs = []
    mcovs = []
    for pb in passbands:
        try:
            df = df0[(df0.passband == pb)]
            mjd_delta_prev = (df.mjd - df.mjd.shift(1)).fillna(100).values.ravel()
            mjd_delta_next = (df.mjd.shift(-1) - df.mjd).fillna(100).values.ravel()
            x_min = df.mjd.min()
            x_max = df.mjd.max()
            y_err_mean = df.flux_err.mean()
            mjd_delta_prev = np.concatenate((100 * np.ones((offset,)),
                                mjd_delta_prev,
                                100 * np.ones((offset,)),
                              ))
            mjd_delta_next = np.concatenate((100 * np.ones((offset,)),
                                mjd_delta_next,
                                100 * np.ones((offset,)),
                              ))
            x = np.concatenate((np.linspace(x_min-500, x_min -450, offset),
                                df.mjd.values,
                                np.linspace(x_max+450, x_max+500, offset),
                              ))
            y = np.concatenate((np.random.randn(offset) * y_err_mean,
                                df.flux.values,
                                np.random.randn(offset) * y_err_mean
                               ))
            y_err = np.concatenate((y_err_mean * np.ones(offset),
                                    df.flux_err,
                                    y_err_mean * np.ones(offset)
                                  ))
            idxmax = np.argmax(y)
            t00 = x[np.argmax(y)]
            A = y.max() 
            Amin = A
            Amax = 2*A
            phi = -5
            k = 0.5
            sigma = 10
            if mjd_delta_prev[idxmax] > 50:
                phi = -50
                Amin = 1.5*A
                Amax = 4*A
            tmax = 20
            if mjd_delta_next[idxmax] > 50:
                Amin = 1.5*A
                Amax = 4*A
            
            p0 = [(Amin + Amax) / 2, t00 + phi, k, sigma]
            bmin = [Amin, t00 + phi -50, k/10, sigma/10]
            bmax =[Amax, t00, k*10, sigma*10]
            bounds = (bmin, bmax)

            def test_func(t, A, phi, k, sigma): 
                res = curve(t, A, phi, k, sigma)
                return res

            params, params_covariance = curve_fit(test_func, x, y, p0, y_err, 
                                                  bounds=bounds,
                                                  max_nfev=1000)
            
            median_cov = np.abs(np.median(params_covariance / A))
            
            if median_cov <= 1000:
                t0s.append(params[1])
            p0s.append([params[0], params[2], params[3]])
            bmins.append([bmin[0], bmin[2], bmin[3]])
            bmaxs.append([bmax[0], bmax[2], bmax[3]])
            xs.append(x)
            ys.append(y)
            y_errs.append(y_err)
            mcovs.append(median_cov)
        except:
            continue
    phi = np.median(t0s)
    t0s = [t for t in t0s if np.abs(t - phi) < 50]
    if len(t0s) <= 1 and len(passbands) > 1:
        return res
    phi = (np.min(t0s) + np.median(t0s)) / 2
    for pb, p0, bmin, bmax, x, y, y_err, median_cov in zip(range(0,6), p0s, bmins, bmaxs, xs, ys, y_errs, mcovs):
        try:
            bounds = (bmin, bmax)

            def test_func(t, A, k, sigma): 
                res = curve(t, A, phi, k, sigma)
                return res

            params, params_covariance = curve_fit(test_func, x, y, p0, y_err, 
                                                  bounds=bounds,
                                                  max_nfev=1000)
        
            median_cov = np.median(params_covariance / A)
            if median_cov < 1000:
                res['newling_A_%d' % pb] = params[0]
                res['newling_k_%d' % pb] = params[1]
                res['newling_sigma_%d' % pb] = params[2]
                
        except:
            continue
    plt.show()
    return res
    
def apply_newling(df, meta):
    df = df[['object_id', 'mjd', 'passband', 'flux', 'flux_err']].merge(meta[['object_id', 'hostgal_photoz']],
                                                           how='left', on='object_id')
    agg =  [get_newling(df, object_id) for object_id in tqdm_notebook(df.object_id.unique())]
    return pd.concat(agg, axis=0)
    

In [5]:
train = pd.read_csv('../input/training_set.csv')
train.head()

Unnamed: 0,object_id,mjd,passband,flux,flux_err,detected
0,615,59750.4229,2,-544.810303,3.622952,1
1,615,59750.4306,1,-816.434326,5.55337,1
2,615,59750.4383,3,-471.385529,3.801213,1
3,615,59750.445,4,-388.984985,11.395031,1
4,615,59752.407,2,-681.858887,4.041204,1


In [6]:
meta_cols = ['object_id', 'ddf', 'hostgal_photoz', 'target']
meta_train = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]
meta_train.head()

Unnamed: 0,object_id,ddf,hostgal_photoz,target
0,615,1,0.0,92
1,713,1,1.6267,88
2,730,1,0.2262,42
3,745,1,0.2813,90
4,1124,1,0.2415,90


In [7]:
get_newling(train.merge(meta_train[['object_id', 'hostgal_photoz']],
                                                           how='left', on='object_id'), 713)

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


Unnamed: 0,object_id,newling_A_0,newling_k_0,newling_sigma_0,newling_A_1,newling_k_1,newling_sigma_1,newling_A_2,newling_k_2,newling_sigma_2,newling_A_3,newling_k_3,newling_sigma_3,newling_A_4,newling_k_4,newling_sigma_4,newling_A_5,newling_k_5,newling_sigma_5
0,713,14.509829,0.05,7.017297,9.129021,4.622225,3.349569,10.529041,0.125848,15.339686,11.330316,0.098584,21.11908,9.827934,5.0,3.160493,14.770886,0.05,9.747785


In [8]:
def work_tta(param):
    (i, fname) = param
    print('starting worker', i)
    train = pd.read_csv('../input/training_set.csv')
    meta_train = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]
    df = train.copy()
    if i > 0:
        init_seeds(i)
        df['flux'] += df['flux_err'] * np.random.randn(*df['flux_err'].shape)
    df = apply_newling(df, meta_train)
    with open('../data/tta_%d_%s.pkl' % (i, fname), 'wb') as file:
        pkl.dump(df, file)  
    print('ending worker', i)
    return 'done'

In [9]:
params = [(i, fname) for i in range(11)]

if 1: 
    pool = Pool(processes=11, maxtasksperchild=1)
    ls   = pool.map( work_tta, params, chunksize=1 )
    pool.close()
else:
    ls = [work_tta(param) for param in params]

starting worker 1
starting worker 0
starting worker 3
starting worker 6
starting worker 2
starting worker 4
starting worker 5
starting worker 7
starting worker 9
starting worker 8
starting worker 10


HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=7848), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)





ending worker 9

ending worker 6
ending worker 10
ending worker 3

ending worker 7

ending worker 1


ending worker 5

ending worker 8
ending worker 2

ending worker 4

ending worker 0


In [8]:
def work_test(param):
    (i, fname) = param
    print('starting worker', i)
    with open('../input/test_chunk_%d.csv' %i, 'rb') as file:
        test = pkl.load(file)
    meta_test = pd.read_csv('../input/training_set_metadata.csv')[meta_cols]
    df = apply_newling(test, meta_test)
    with open('../data/test_%d_%s.pkl' % (i, fname), 'wb') as file:
        pkl.dump(df, file)  
    print('ending worker', i)
    return 'done'

In [9]:
params = [(i, fname) for i in range(91)]
params.append((100, fname))

if 1: 
    pool = Pool(processes=20, maxtasksperchild=1)
    ls   = pool.map( work_test, params, chunksize=1 )
    pool.close()
else:
    ls = [work_test(param) for param in params]

starting worker 1
starting worker 0
starting worker 3
starting worker 2
starting worker 6
starting worker 7
starting worker 4
starting worker 5
starting worker 9
starting worker 8
starting worker 16
starting worker 14
starting worker 15
starting worker 17
starting worker 19
starting worker 18
starting worker 10
starting worker 11
starting worker 13
starting worker 12


HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39033), HTML(value='')))

HBox(children=(IntProgress(value=0, max=15137), HTML(value='')))

  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39110), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39058), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39087), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39096), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39095), HTML(value='')))

HBox(children=(IntProgress(value=0, max=34964), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))

HBox(children=(IntProgress(value=0, max=15183), HTML(value='')))

  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


HBox(children=(IntProgress(value=0, max=39092), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39048), HTML(value='')))

  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39098), HTML(value='')))

  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39057), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39055), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39096), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 0
starting worker 20


HBox(children=(IntProgress(value=0, max=39036), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 1
starting worker 21


HBox(children=(IntProgress(value=0, max=39020), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 2
starting worker 22


HBox(children=(IntProgress(value=0, max=39103), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 19
starting worker 23


HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 18
starting worker 24



HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 16
starting worker 25


HBox(children=(IntProgress(value=0, max=39094), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


ending worker 17
starting worker 26


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


HBox(children=(IntProgress(value=0, max=39048), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 14
starting worker 27


HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 15
starting worker 28


HBox(children=(IntProgress(value=0, max=39095), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 13
starting worker 29


HBox(children=(IntProgress(value=0, max=39083), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 11
starting worker 30


HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 10
starting worker 31


HBox(children=(IntProgress(value=0, max=39058), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 12
starting worker 32


HBox(children=(IntProgress(value=0, max=39080), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 9
starting worker 33


HBox(children=(IntProgress(value=0, max=39072), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 6
starting worker 34


HBox(children=(IntProgress(value=0, max=39073), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 7
starting worker 35


HBox(children=(IntProgress(value=0, max=39094), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


ending worker 8
starting worker 36


HBox(children=(IntProgress(value=0, max=39106), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 4
starting worker 37


HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 5
starting worker 38


HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 3
starting worker 39


HBox(children=(IntProgress(value=0, max=39041), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 20
starting worker 40


HBox(children=(IntProgress(value=0, max=39098), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 21
starting worker 41


HBox(children=(IntProgress(value=0, max=39046), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 22
starting worker 42


HBox(children=(IntProgress(value=0, max=39022), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 26
starting worker 43


HBox(children=(IntProgress(value=0, max=39002), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 31
starting worker 44




HBox(children=(IntProgress(value=0, max=39027), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 28
starting worker 45
ending worker 24
starting worker 46


HBox(children=(IntProgress(value=0, max=39069), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39035), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


ending worker 33
starting worker 47


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)





HBox(children=(IntProgress(value=0, max=39063), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)







  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 30
starting worker 48


HBox(children=(IntProgress(value=0, max=39090), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 36
starting worker 49


HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


ending worker 37
starting worker 50
ending worker 32
starting worker 51


HBox(children=(IntProgress(value=0, max=39079), HTML(value='')))

HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 29
starting worker 52


HBox(children=(IntProgress(value=0, max=39084), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 23
starting worker 53


HBox(children=(IntProgress(value=0, max=39037), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 35
starting worker 54


HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 25
starting worker 55


HBox(children=(IntProgress(value=0, max=39078), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 39
starting worker 56


HBox(children=(IntProgress(value=0, max=39085), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 34
starting worker 57


HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 27
starting worker 58


HBox(children=(IntProgress(value=0, max=39085), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 38
starting worker 59


HBox(children=(IntProgress(value=0, max=39051), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 41
starting worker 60


HBox(children=(IntProgress(value=0, max=39077), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 40
starting worker 61


HBox(children=(IntProgress(value=0, max=39103), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 57
starting worker 62
ending worker 58
starting worker 63


HBox(children=(IntProgress(value=0, max=39051), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39050), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 59
starting worker 64



HBox(children=(IntProgress(value=0, max=39109), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 55
starting worker 65


HBox(children=(IntProgress(value=0, max=39093), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 42
starting worker 66


HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 56
starting worker 67


HBox(children=(IntProgress(value=0, max=39134), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 53
starting worker 68


HBox(children=(IntProgress(value=0, max=39056), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 51
starting worker 69
ending worker 52


HBox(children=(IntProgress(value=0, max=39060), HTML(value='')))

starting worker 70


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


HBox(children=(IntProgress(value=0, max=39083), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 48
starting worker 71


HBox(children=(IntProgress(value=0, max=39027), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 54
starting worker 72


HBox(children=(IntProgress(value=0, max=39077), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 49
starting worker 73


HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 50
starting worker 74


HBox(children=(IntProgress(value=0, max=39061), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 47
starting worker 75


HBox(children=(IntProgress(value=0, max=39030), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 45
starting worker 76



HBox(children=(IntProgress(value=0, max=39092), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 46
starting worker 77


HBox(children=(IntProgress(value=0, max=39061), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 43
starting worker 78



HBox(children=(IntProgress(value=0, max=39064), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


ending worker 44
starting worker 79


HBox(children=(IntProgress(value=0, max=39054), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 60
starting worker 80


HBox(children=(IntProgress(value=0, max=39073), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 61
starting worker 81


HBox(children=(IntProgress(value=0, max=39080), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 73
starting worker 82


HBox(children=(IntProgress(value=0, max=39070), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 72
starting worker 83



HBox(children=(IntProgress(value=0, max=39100), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app






  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 75
starting worker 84


HBox(children=(IntProgress(value=0, max=39063), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 62
starting worker 85
ending worker 74
starting worker 86


HBox(children=(IntProgress(value=0, max=39064), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


ending worker 78
starting worker 87


HBox(children=(IntProgress(value=0, max=39086), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


HBox(children=(IntProgress(value=0, max=39044), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)



ending worker 66
starting worker 88



HBox(children=(IntProgress(value=0, max=39036), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 79
starting worker 89


HBox(children=(IntProgress(value=0, max=39076), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)




ending worker 71
starting worker 90



HBox(children=(IntProgress(value=0, max=28537), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app



ending worker 76
starting worker 100


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app





  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


ending worker 68
ending worker 77

ending worker 70


ending worker 64
ending worker 69

ending worker 67

ending worker 63

ending worker 65

ending worker 80

ending worker 81

ending worker 90

ending worker 85


ending worker 89
ending worker 83

ending worker 84



ending worker 82
ending worker 87
ending worker 88

ending worker 86


ValueError: All objects passed were None

In [10]:
with open('../data/tta_0_%s.pkl' % fname, 'rb') as file:
    train_newling = pkl.load(file)
train_newling.head()

Unnamed: 0,object_id,newling_A_0,newling_k_0,newling_sigma_0,newling_A_1,newling_k_1,newling_sigma_1,newling_A_2,newling_k_2,newling_sigma_2,newling_A_3,newling_k_3,newling_sigma_3,newling_A_4,newling_k_4,newling_sigma_4,newling_A_5,newling_k_5,newling_sigma_5
0,615,,,,,,,,,,,,,,,,,,
0,713,14.509829,0.05,7.017295,9.129021,4.622224,3.349569,10.529041,0.125847,15.339687,11.330316,0.098584,21.119081,9.827934,5.0,3.160493,14.770886,0.05,9.747784
0,730,5.94217,2.584637,3.260867,5.693109,2.962169,15.973989,31.492067,2.340494,26.221229,37.263821,5.0,16.182763,61.739972,2.798004,22.820865,51.340015,5.0,16.659592
0,745,72.056116,0.26729,39.706623,768.977172,4.222222,9.537317,883.180848,1.186198,20.37933,304.876053,1.860789,24.566073,275.449677,1.840142,25.133966,223.07685,1.312875,30.183803
0,1124,7.996391,0.05,82.444844,106.133606,0.05,61.384591,426.686768,0.05598,47.769715,559.27362,0.264181,35.576928,574.400756,0.6026,27.256361,436.63034,1.24593,19.833317


In [11]:
with open('../data/train_%s.pkl' % fname, 'wb') as file:
     pkl.dump(train_newling, file)


In [12]:
test_newling_l = []
for (i, fname) in params:
    with open('../data/test_%d_%s.pkl' % (i, fname), 'rb') as file:
        test_newling = pkl.load(file)
        test_newling_l.append(test_newling[train_newling.columns])
len(test_newling_l)

FileNotFoundError: [Errno 2] No such file or directory: '../data/test_100_newling_003.pkl'

In [18]:
work_test((100, fname))

starting worker 100


HBox(children=(IntProgress(value=0, max=1), HTML(value='')))

104853812

ending worker 100


  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app


'done'

In [13]:
test_newling = pd.concat(test_newling_l, axis=0)

In [14]:
test_newling.shape

(2037682, 19)

In [15]:
test_newling.head()

Unnamed: 0,object_id,newling_A_0,newling_k_0,newling_sigma_0,newling_A_1,newling_k_1,newling_sigma_1,newling_A_2,newling_k_2,newling_sigma_2,newling_A_3,newling_k_3,newling_sigma_3,newling_A_4,newling_k_4,newling_sigma_4,newling_A_5,newling_k_5,newling_sigma_5
0,13,5.81627,1.208463,17.29934,42.136626,0.05,32.664476,36.794466,0.05,51.936197,44.750648,0.05,77.229948,64.148255,0.05,78.899159,37.369808,0.123168,88.986026
0,14,12.74393,0.05,1.0,20.622435,5.0,4.96106,39.105114,0.05,17.239586,10.469539,0.05,67.694468,9.230726,0.05,100.0,14.839427,0.05,39.054852
0,17,12.112982,5.0,1.607649,12.12654,0.650409,10.571518,12.973317,0.408951,13.127777,10.891992,0.71901,13.812539,12.109291,0.514319,24.053213,16.76128,4.420174,1.0
0,23,,,,,,,8.081628,0.05,9.331049,19.289775,0.214456,3.192167,21.27269,0.05,31.211123,28.061138,0.096637,18.197838
0,34,19.611341,0.553714,8.036258,69.579277,0.0875,35.491928,124.475609,0.456569,42.73953,122.027508,2.198656,7.124429,167.285843,0.05,22.914417,136.141034,0.05,24.875872


In [19]:
test_newling = test_newling.groupby('object_id').mean().reset_index()
test_newling.shape

(2037682, 19)

In [20]:
with open('../data/test_%s.pkl' % fname, 'wb') as file:
     pkl.dump(test_newling, file)
