Skip to content

Commit

Permalink
More unauthorized work
Browse files Browse the repository at this point in the history
  • Loading branch information
bashtage committed Feb 23, 2017
1 parent 1d47d1e commit 9d48c8f
Show file tree
Hide file tree
Showing 7 changed files with 350 additions and 145 deletions.
4 changes: 4 additions & 0 deletions doc/source/_static/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore
2 changes: 1 addition & 1 deletion panel/iv/covariance.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,7 +347,7 @@ class IVGMMCovariance(HomoskedasticCovariance):
Weighting matrix used in GMM estimation
"""

def __init__(self, x, y, z, params, w):
def __init__(self, x, y, z, params, w, **cov_config):
super(IVGMMCovariance, self).__init__(x, y, z, params, False)
self.w = w

Expand Down
95 changes: 95 additions & 0 deletions panel/iv/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
import numpy as np
import pandas as pd
import xarray as xr

dim_err = '{0} has too many dims. Maximum is 2, actual is {2}'
type_err = 'Only ndarrays, DataArrays and Series and DataFrames are permitted'


def convert_columns(s):
if pd.api.types.is_categorical(s):
out = pd.get_dummies(s, drop_first=True)
out.columns = [s.name + '.' + c for c in out]
return out
return s


def expand_categoricals(x):
if isinstance(x, pd.Series):
return convert_columns(x)
if isinstance(x, pd.DataFrame):
return pd.concat([convert_columns(x[c]) for c in x.columns], axis=1)
elif isinstance(x, pd.Panel):
raise NotImplementedError


class DataHandler(object):
def __init__(self, x, var_name='x'):

if isinstance(x, DataHandler):
x = x.original

self.original = x
xndim = x.ndim
if xndim > 2:
raise ValueError(dim_err.format(var_name, xndim))

if isinstance(x, np.ndarray):
x = x.view()
if xndim == 1:
x.shape = (x.shape[0], -1)

self._ndarray = x
index = list(range(x.shape[0]))
cols = [var_name + '.{0}'.format(i) for i in range(x.shape[1])]
self._pandas = pd.DataFrame(x, index=index, columns=cols)
self._labels = {0: index,
1: cols}

elif isinstance(x, (pd.Series, pd.DataFrame)):
dts = [x.dtype] if xndim == 1 else x.dtypes
for dt in dts:
if not (pd.api.types.is_numeric_dtype(dt)
or pd.api.types.is_categorical_dtype(dt)):
raise ValueError('Only numeric or categorical data permitted')

x = expand_categoricals(x)
if x.ndim == 1:
x = pd.DataFrame({var_name: x})

self._pandas = x
self._ndarray = self._pandas.values
self._labels = {i: list(label) for i, label in zip(range(x.ndim), x.axes)}

elif isinstance(x, xr.DataArray):
raise NotImplementedError('Not implemented yet.')
else:
raise TypeError(type_err)

@property
def pandas(self):
return self._pandas

@property
def ndarray(self):
return self._ndarray

@property
def shape(self):
return self._ndarray.shape

@property
def ndim(self):
return self._ndarray.ndim

@property
def cols(self):
return self._labels[1]

@property
def rows(self):
return self._labels[0]

@property
def labels(self):
return self._labels

0 comments on commit 9d48c8f

Please sign in to comment.