In [4]:
import pandas as pd
import random as rand

import numpy as np
from functools import singledispatch, update_wrapper

from multipledispatch import dispatch
from numbers import Real
from numbers import Number

from functools import reduce

In [5]:
import libpysat
from libpysat 

SyntaxError: invalid syntax (<ipython-input-5-6c2aa4d53ec2>, line 2)

In [6]:
@dispatch(Real, pd.Float64Index)
def _get_subindices(scalar, indices, tolerance=0):
    """
    Returns every index within the range [scalar +/- tolerance].
    
    >>> indices = pd.Float64Index([2.3, 3.55, 6.23, 9.99])
    >>> _get_subindices(10, indices, tolerance=.5)
    9.99
    
    Parameters
    ----------
    
    scalar : Number
             The scalar to access
    
    indices : pandas.Float64Index
              The index to access
              
    Returns
    -------
    
    : iterable
      the values that were inside the tolerance range for the scalar
      
    """
    
    start = scalar - tolerance
    stop = scalar + tolerance
    subindices = indices.where((indices >= start) & (indices <= stop)).dropna().astype(float)
    
    if subindices.size == 0:
        raise KeyError('{} is not in Index'.format(scalar))
    
    return subindices.__array__()


@dispatch((pd.Series, list, set, np.ndarray), pd.Float64Index)
def _get_subindices(iterable, indices, tolerance=0):
    """
    Returns every index within the range [scalar +/- tolerance] from a iterable of 
    values.
    
    >>> l = [2, 9]
    >>> indices = pd.Float64Index([2.3, 3.55, 6.23, 9.99])
    >>> _get_subindices(l, indices, tolerance=.5)
    [2.3, 9.99]
    
    Parameters
    ----------
    
    iterable : iterable
               container of values 
    
    indices : pandas.Float64Index
              The index to access
              
    Returns
    -------
    
    : list
      list of subindices within range
    
    """
    subindices = []
    for x in iterable:
        subindices.extend(_get_subindices(x, indices, tolerance=tolerance))
    return subindices


@dispatch(slice, pd.Float64Index)
def _get_subindices(s, indices, tolerance=0):
    """
    Returns every index within the range [scalar +/- tolerance] from a iterable of 
    values.
    
    >>> l = [2, 9]
    >>> indices = pd.Float64Index([2.3, 3.55, 6.23, 9.99])
    >>> _get_subindices(l, indices, tolerance=.5)
    [2.3, 9.99]
    
    Parameters
    ----------
    
    iterable : iterable
               container of values 
    
    indices : pandas.Float64Index
              The index to access
              
    Returns
    -------
    
    : list
      list of subindices within range
    
    """
    start = s.start
    stop = s.stop
    
    if start is None:
        start = -np.inf
    
    if stop is None:
        stop = np.inf
    
    subindices = indices.where((indices >= start) & (indices <= stop)).dropna().astype(float)
    
    if subindices.size == 0:
        raise KeyError('{} is not in range'.format(s))
        
    return subindices


@dispatch(object, pd.Index)
def _get_subindices(o, indices, tolerance=0):
    """
    Default function, returns the original input. This is just bookeeping
    for trying to use _get_subindices for indices that are not floating 
    point indices. 
    
    
    Parameters
    ----------
    
    o : object
               any python object
    
    indices : pandas.Index
              any pandas index
              
    Returns
    -------
    
    : object
      just returns o
    
    """
    return o


@dispatch((pd.Series, list, set, slice, np.ndarray, Real), tuple)
def _get_subindices(key, indexes, tolerance=0):
    """
    Returns 
    
    
    Parameters
    ----------
    
    o : object
               any python object
    
    indices : pandas.Index
              any pandas index
              
    Returns
    -------
    
    : object
      just returns o
    
    """
    return _get_subindices(key, indexes[0], tolerance=tolerance)


@dispatch(tuple, tuple)
def _get_subindices(keys, indexes, tolerance=0):
    """
    Unpacks a tuple of keys and and tuple of indices. Matches the indices with 
    keys and dispatches to the other _get_subindices. 
    
    
    Parameters
    ----------
    
    keys : tuple
           a tuple of keys
    
    indices : tuple
              a tuple of indices
              
    Returns
    -------
    
    : tuple
      tuple of resulting indices
    
    """
    
    num_keys = len(keys)
    num_indexes = len(indexes)
    
    if num_keys > num_indexes:
        raise KeyError('{} keys for {}-dimensional keyspace'.format(num_keys, num_indexes))
    
    dim = max(num_keys, num_indexes)
    keys = keys[:dim]
    indexes = indexes[:dim]
    
    subindexes = tuple([_get_subindices(key, index, tolerance=tolerance) for key, index in zip(keys, indexes)])
    
    return subindexes

In [179]:
class SpectrumLocIndexer(pd.core.indexing._LocIndexer):
    """
    """

    @property
    def tolerance(self):
        if not hasattr(self, '_tolerance'):
            self._tolerance = .5
        return self._tolerance
    
    
    @tolerance.setter
    def tolerance(self, val):
        self._tolerance = val
    
    
    def __getitem__(self, key):
        try:
            x,y,columns = None, None, self.obj.wavelengths
            
            if isinstance(self.obj.index, pd.MultiIndex):
                x,y = self.obj.index.levels
                indexes = x,y, self.obj.wavelengths
                subindices = _get_subindices(key, indexes, tolerance=self._tolerance)
                        
                x = subindices[0:1] if subindices[0:1] else tuple([slice(None, None)])
                y = subindices[1:2] if subindices[1:2] else tuple([slice(None, None)])
                columns = subindices[2:3] if subindices[2:3] else tuple([slice(None, None)])
                
                columns = columns[0]
                if isinstance(columns, pd.Index):
                    columns = columns.union(self.obj.metadata)
                
                subindices = tuple([[x[0], y[0]], columns])
            
            else:
                x = self.obj.index
                indexes = x,columns
                subindices = _get_subindices(key, indexes, tolerance=self._tolerance)
                
                x = subindices[0:1] if subindices[0:1] else tuple([slice(None, None)])
                columns = subindices[1:2] if subindices[1:2] else tuple([slice(None, None)])
                
                columns = columns[0]
                if isinstance(columns, pd.Index):
                    columns = columns.union(self.obj.metadata)
                
                subindices = tuple([x[0], columns])
                
            
            subframe = super(SpectrumLocIndexer, self).__getitem__(subindices)  
        
        except Exception as e:
            subframe = super(SpectrumLocIndexer, self).__getitem__(key)
        
        if isinstance(subframe, Spectrum):
            subframe.wavelengths = self.obj.wavelengths
            subframe.metadata = self.obj.metadata
        
        return subframe
        
        
class SpectrumiLocIndexer(pd.core.indexing._iLocIndexer):
    """
    """
    
    def __getitem__(self, key):
        subframe = super(SpectrumiLocIndexer, self).__getitem__(key)
        
        if isinstance(subframe, Spectrum):
            subframe.wavelengths = self.obj.wavelengths
            subframe.metadata = self.obj.metadata
        
        return subframe
    
    
class Spectrum(pd.Series):
    
    _metadata = ['_loc', 'wavelengths', 'metadata']
    
    def __init__(self, *args, **kwargs):
        wavelengths = kwargs.pop('wavelengths', None)
        metadata = kwargs.pop('metadata', None)
        _loc = kwargs.pop('loc', None)
        super(Spectrum, self).__init__(*args, **kwargs)
        
    @property
    def _constructor(self):
        return Spectrum

    @property
    def _constructor_expanddim(self):
        return pd.DataFrame

        
class Spectra(object):
    """
    """
    
    def __init__(self, df = None, wavelengths={}, metadata={}, tolerance=.5):
        if df is not None:
            self._data = df
        else:
            self._data = pd.DataFrame()
        
        self.wavelengths = pd.Float64Index(wavelengths)
        self.metadata = metadata
       
        if isinstance(df, pd.DataFrame):
            self.metadata = df.columns.difference(self.wavelengths)
        else:
            self.metadata = df.index.difference(self.wavelengths)
    
        loc_name = self._data.loc.name
        iloc_name = self._data.iloc.name
        self._iloc = SpectrumiLocIndexer(name=iloc_name, obj=self)
        self._loc = SpectrumLocIndexer(name=loc_name, obj=self)
        self._loc.tolerance = tolerance
        
        self._get_axis = self._data._get_axis
        self._get_axis_name = self._data._get_axis_name
        self._slice = self._data._slice
        self._xs = self._data._xs   
        self._ixs = self._data._ixs
        self._data._constructor_sliced = Spectrum

                          
    def __repr__(self):
        return self._data.__repr__()
        
        
    @property
    def loc(self):
        return self._loc
    
        
    @property
    def iloc(self):
        return self._iloc
    
    @property
    def take(self):
        return self._data.take
    
    
    def head(self, n=5):
        return self._data.head()
    
    
    @property
    def index(self):
        return self._data.index
    
    @property
    def columns(self):
        return self._data.columns
    
    @property
    def ndim(self):
        return self._data.ndim
    
    @property
    def sort_index(self):
        return self._data.sort_index
    
    @property
    def reindex(self):
        return self._data.reindex
    
    
    @property
    def axes(self):
        return self._data.axes
    
    
    @property
    def iterrows(self):
        return self._data.iterrows
    
    
    def apply(self, func, *args, **kwargs):
        return self._data.apply(func, *args, **kwargs)

            
    def apply_spectra(self, func, *args, **kwargs):
        self._data = self.apply(func, args, axis=1, **kwargs)
    
        
    @property
    def tolerance(self):
        if not hasattr(self._loc, '_tolerance'):
            self._loc._tolerance = .5
        return self._loc._tolerance
    
    
    @tolerance.setter
    def tolerance(self, val):
        self._loc._tolerance = val

# 2-D Index Test

In [185]:
columns = sorted([rand.random()*1000 for i in range(30)])
columns.extend(['x','y', 'data', 'data2'])

data = [dict(zip(columns, [rand.randint(10,200) for i in range(len(columns))] )) for i in range(1,100)]
qub = pd.DataFrame(data).set_index(['x','y'])

In [186]:
data = Spectra(qub, wavelengths = set(qub.columns)-{'x','y', 'data', 'data2'})
data.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,30.39824868335561,35.40090313916511,59.345471559650754,82.30337540584077,86.79736707640062,100.1424114078312,100.32668800118716,105.46765530189339,142.09893738128898,249.9545906248516,...,754.1290775629111,804.8174444521397,843.0066940393215,846.8087981282341,868.1359389246728,928.1594194198406,930.7963728572419,985.4370216691224,data,data2
x,y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
172,146,87,78,121,114,112,28,104,168,118,58,...,193,123,167,56,137,146,90,26,40,197
126,69,147,170,197,93,92,129,191,166,189,31,...,93,28,114,22,73,157,109,30,41,155
138,45,64,101,56,45,10,146,174,54,179,34,...,41,60,95,18,130,131,40,18,79,195
192,81,185,191,40,134,39,68,173,122,142,57,...,188,28,158,187,170,26,152,169,166,88
155,100,142,118,104,113,195,174,152,145,89,27,...,66,199,117,181,80,54,76,199,85,88


In [187]:
data.metadata

Index(['data', 'data2'], dtype='object')

In [188]:
data.iloc[0].wavelengths

Float64Index([142.098937381, 537.210519539, 30.3982486834,  928.15941942,
              930.796372857, 35.4009031392, 804.817444452, 305.639693699,
              567.857863572, 59.3454715597, 700.834301399, 701.534372191,
              706.392896393, 713.179004396, 843.006694039, 846.808798128,
              82.3033754058, 86.7973670764, 985.437021669, 100.142411408,
              100.326688001, 613.344603884, 868.135938925, 105.467655302,
              619.914480695, 368.633294925, 754.129077563, 501.933533212,
              249.954590625, 378.582443915],
             dtype='float64')

In [189]:
# Slicing the spectral data also utilizes the tolerence 
data.sort_index(inplace=True)
data.loc[142:400, 135:500, 100:10000].head()

trivial
trivial
(slice(142, 400, None), slice(135, 500, None), Float64Index([142.098937381, 537.210519539,  928.15941942, 930.796372857,
              804.817444452, 305.639693699, 567.857863572, 700.834301399,
              701.534372191, 706.392896393, 713.179004396, 843.006694039,
              846.808798128, 985.437021669, 100.142411408, 100.326688001,
              613.344603884, 868.135938925, 105.467655302, 619.914480695,
              368.633294925, 754.129077563, 501.933533212, 249.954590625,
              378.582443915],
             dtype='float64'))
Index([142.09893738128898,  537.2105195389917,  928.1594194198406,
        930.7963728572419,  804.8174444521397,  305.6396936989828,
         567.857863572013,  700.8343013989669,  701.5343721908899,
        706.3928963927668,    713.17900439622,  843.0066940393215,
        846.8087981282341,  985.4370216691224,  100.1424114078312,
       100.32668800118716,  613.3446038841573,  868.1359389246728,
       105.46765530189339,  61



Unnamed: 0_level_0,Unnamed: 1_level_0,142.09893738128898,537.2105195389917,928.1594194198406,930.7963728572419,804.8174444521397,305.6396936989828,567.857863572013,700.8343013989669,701.5343721908899,706.3928963927668,...,868.1359389246728,105.46765530189339,619.9144806951776,368.6332949245175,754.1290775629111,501.9335332116035,249.9545906248516,378.58244391482464,data,data2
x,y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
137,112,169,18,37,23,72,109,136,92,163,83,...,158,74,96,130,195,195,100,43,86,22
138,45,179,170,131,40,60,173,16,42,38,196,...,130,54,145,13,41,72,34,44,79,195
139,13,41,161,57,70,176,54,139,166,196,105,...,121,46,146,124,166,161,131,166,171,143
140,188,125,168,41,61,55,87,26,106,123,45,...,177,25,126,92,146,163,154,118,36,16
141,133,37,113,112,147,37,177,71,163,126,61,...,86,130,62,161,110,198,26,46,89,183


In [190]:
data.loc[12:50,16:200,:].head()

trivial
trivial
(slice(12, 50, None), slice(16, 200, None), Float64Index([142.098937381, 537.210519539, 30.3982486834,  928.15941942,
              930.796372857, 35.4009031392, 804.817444452, 305.639693699,
              567.857863572, 59.3454715597, 700.834301399, 701.534372191,
              706.392896393, 713.179004396, 843.006694039, 846.808798128,
              82.3033754058, 86.7973670764, 985.437021669, 100.142411408,
              100.326688001, 613.344603884, 868.135938925, 105.467655302,
              619.914480695, 368.633294925, 754.129077563, 501.933533212,
              249.954590625, 378.582443915],
             dtype='float64'))
Index([142.09893738128898,  537.2105195389917,  30.39824868335561,
        928.1594194198406,  930.7963728572419,  35.40090313916511,
        804.8174444521397,  305.6396936989828,   567.857863572013,
       59.345471559650754,  700.8343013989669,  701.5343721908899,
        706.3928963927668,    713.17900439622,  843.0066940393215,
        846



Unnamed: 0_level_0,Unnamed: 1_level_0,142.09893738128898,537.2105195389917,30.39824868335561,928.1594194198406,930.7963728572419,35.40090313916511,804.8174444521397,305.6396936989828,567.857863572013,59.345471559650754,...,868.1359389246728,105.46765530189339,619.9144806951776,368.6332949245175,754.1290775629111,501.9335332116035,249.9545906248516,378.58244391482464,data,data2
x,y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
14,28,16,32,65,78,195,44,163,34,115,108,...,97,78,126,130,96,69,171,166,69,169
17,121,42,188,35,89,46,145,51,159,124,53,...,21,49,92,158,39,31,160,78,146,41
19,187,39,26,164,185,45,135,148,193,86,148,...,196,27,154,107,10,170,115,142,71,150
27,181,147,104,196,82,130,197,23,140,124,187,...,84,65,84,62,130,78,195,145,92,95
29,94,128,13,14,90,177,56,112,133,163,162,...,84,29,47,66,156,163,13,126,110,173


In [91]:
data.loc[12:90,100:200,118:900].head()

trivial
trivial
(slice(12, 90, None), slice(100, 200, None), Float64Index([ 711.66845142, 850.487963627, 276.957775241, 853.957743926,
              664.915572575, 281.099569044,  542.11951351, 159.871898565,
              416.625628418, 679.861747827,  810.68721216, 556.307709096,
              492.410818033, 492.865857942, 753.118854158, 244.381794582,
              437.087580131, 125.006695266, 824.711771598, 635.014351831,
               124.51298623, 445.647976897],
             dtype='float64'))
Float64Index([ 711.66845142, 850.487963627, 276.957775241, 853.957743926,
              664.915572575, 281.099569044,  542.11951351, 159.871898565,
              416.625628418, 679.861747827,  810.68721216, 556.307709096,
              492.410818033, 492.865857942, 753.118854158, 244.381794582,
              437.087580131, 125.006695266, 824.711771598, 635.014351831,
               124.51298623, 445.647976897],
             dtype='float64')


Unnamed: 0_level_0,Unnamed: 1_level_0,711.66845142,850.487963627,276.957775241,853.957743926,664.915572575,281.099569044,542.11951351,159.871898565,416.625628418,679.861747827,...,492.410818033,492.865857942,753.118854158,244.381794582,437.087580131,125.006695266,824.711771598,635.014351831,124.51298623,445.647976897
x,y,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
12,156,181,92,21,60,47,142,39,195,17,25,...,11,27,41,80,23,68,175,134,183,149
16,188,200,115,24,12,174,38,42,76,198,32,...,161,64,70,121,138,196,40,24,192,200
17,149,83,162,80,155,101,168,183,26,148,85,...,138,52,159,49,127,179,131,116,167,81
19,152,33,133,189,171,72,147,77,117,129,176,...,187,142,15,31,66,105,181,110,133,148
23,53,139,185,151,137,58,147,43,139,135,118,...,20,24,102,132,142,42,66,55,94,133


# 1-D Index test

In [592]:
columns = sorted([rand.random()*1000 for i in range(30)])

data = [dict(zip(columns, [rand.randint(10,200) for i in range(len(columns))] )) for i in range(1,20)]
qub = pd.DataFrame(data)
data = Spectra(qub, wavelengths = set(qub.columns))
data.head()

Unnamed: 0,5.66422202647,26.4229312246,27.0792774379,108.626961035,132.404701432,137.529029535,137.958335946,162.864737132,310.354462635,356.172106541,...,649.245586856,759.569084639,767.905664919,802.212809912,855.8721223,868.789169737,881.984457553,883.740529957,899.504665981,916.335919706
0,15,89,157,84,142,135,43,56,117,83,...,183,26,165,12,20,134,89,118,31,12
1,153,172,172,171,151,103,170,24,25,183,...,71,98,37,18,20,40,149,70,99,144
2,83,39,29,196,33,155,122,61,107,44,...,143,182,76,56,85,147,153,56,37,134
3,106,137,125,85,13,177,190,27,64,62,...,153,56,16,183,70,146,126,34,79,196
4,35,121,91,183,156,31,90,145,173,55,...,119,114,20,138,144,91,177,36,83,92


In [591]:
data.loc[3:10, [61]].head()

trivial
(slice(3, 10, None), [61.490218005108119])
(slice(3, 10, None),) ([61.490218005108119],)


Unnamed: 0,61.4902180051
3,197
4,38
5,168
6,38
7,177


In [524]:
# test = data.loc[0]

In [525]:
# test.loc[6]

In [526]:
from timeit import timeit
import time
import numpy as np




In [574]:
df = pd.DataFrame(np.arange(20).reshape(2,10))

In [577]:
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,0,1,2,3,4,5,6,7,8,9
1,10,11,12,13,14,15,16,17,18,19


In [535]:
type(df.iloc[0])

pandas.core.series.Series

In [536]:
# df._constructor_sliced = Spectra

In [537]:
df.iloc[0].iloc

<pandas.core.indexing._iLocIndexer at 0x10d896c50>

In [538]:
arr = np.array([1,2,3,4])

In [573]:
df.loc[0:4, 5:10]

Unnamed: 0,5,6,7,8,9
0,5,6,7,8,9
1,15,16,17,18,19


In [540]:
tuple([1,2,3])[3:4]

()