# Interpolator script

This serves to fill missing values into the sign lang data since each example is not the same size

In [1]:
import numpy as np
import random
from scipy.interpolate import interp1d

In [2]:
x = np.load('x.npy')
y = np.load('y.npy')

In [3]:
# This is the version of the interpolator that uses the numpy.interp linear interpolator

def npinterpolate(data):
    
    def nanfinder(x):
        return np.isnan(x), lambda z: z.nonzero()[0]
    
    #np.random.seed(26)
    x_interp = []
    
    
    for row in range(data.shape[0]): # For each of 2565 rows
        holder = []
        dim = np.int(np.count_nonzero(~np.isnan(data[row]))/22) # return the number of observations in this example
        for i in range(1,23): # For each of 22 variables
            scaffold = np.array([np.nan]*136) # (136,) array of nans
            current_var = data[row,i*dim-dim:i*dim] # return array with current var of size (dim,)
            randpts = np.sort(random.sample(range(136),dim)) # return array of random points of size (dim,)
            scaffold[randpts] = current_var[:]
            nans,x = nanfinder(scaffold)
            scaffold[nans] = np.interp(x(nans),x(~nans),scaffold[~nans])
            holder.extend(scaffold.tolist()) # long list of values, 1D
        x_interp.extend(holder) # even longer list of values, 1D
    return np.array(x_interp).reshape(data.shape)

In [10]:
# This is the version of the interpolator script that uses the scipy interpolator with kind = slinear

def scipyinterpolate(data):
    x_interp = []
    for row in range(data.shape[0]): # For each of 2565 rows
        holder = []
        dim = np.int(np.count_nonzero(~np.isnan(data[row]))/22)
        for i in range(1,23): # For each of 22 variables
            scaffold = np.full(2992,np.nan)
            old_x = data[row,i*dim-dim:i*dim]
            old_y= [1]+np.sort(random.sample(range(2,136),dim-2)).tolist()+[136]
            new_y = [x for x in range(2,136) if np.in1d(range(2,136),old_y).tolist()[range(2,136).index(x)] == False]
            f = interp1d(old_y,old_x,kind=1)
            new_x = f(new_y)
            scaffold[[x-1 for x in old_y]] = old_x[:]
            scaffold[[x-1 for x in new_y]] = new_x[:]
            holder.extend(scaffold) # even longer list of values, 1D
        x_interp.extend(holder)
    return np.array(x_interp).reshape(data.shape)

In [4]:
x_npinterp = npinterpolate(x)
#x_scipyinterp = scipyinterpolate(x)

In [5]:
np.save('data/x_npinterp.npy',x_npinterp)
#np.save('x_scipyinterp.npy',x_scipyinterp)

In [8]:
# Find min / max / mean sizes of data

size = []
for i in range(x.shape[0]):
    size.append(np.count_nonzero(~np.isnan(x[i]))/22)
np.max(size)

136.0