/
array.py
219 lines (182 loc) · 6.08 KB
/
array.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
"""
Utility functions that do not explicitly relate to Volatility modeling
"""
from __future__ import absolute_import, division, print_function
from arch.compat.pandas import is_datetime64_dtype
from arch.compat.python import long
import datetime as dt
import numpy as np
from pandas import DataFrame, NaT, Series, Timestamp, to_datetime
__all__ = ['ensure1d', 'parse_dataframe', 'DocStringInheritor',
'date_to_index']
deprecation_doc = """
{func} has been moved. Please use {new_location}.{func}.
"""
def ensure1d(x, name, series=False):
if isinstance(x, Series):
if not isinstance(x.name, str):
x.name = str(x.name)
if series:
return x
else:
return np.asarray(x)
if isinstance(x, DataFrame):
if x.shape[1] != 1:
raise ValueError(name + ' must be squeezable to 1 dimension')
else:
x = Series(x.iloc[:, 0], x.index)
if not isinstance(x.name, str):
x.name = str(x.name)
if series:
return x
else:
return np.asarray(x)
if not isinstance(x, np.ndarray):
x = np.asarray(x)
if x.ndim == 0:
x = x[None]
elif x.ndim != 1:
x = np.squeeze(x)
if x.ndim != 1:
raise ValueError(name + ' must be squeezable to 1 dimension')
if series:
return Series(x, name=name)
else:
return np.asarray(x)
def ensure2d(x, name):
if isinstance(x, Series):
return DataFrame(x)
elif isinstance(x, DataFrame):
return x
elif isinstance(x, np.ndarray):
if x.ndim == 0:
return np.asarray([[x]])
elif x.ndim == 1:
return x[:, None]
elif x.ndim == 2:
return x
else:
raise ValueError('Variable ' + name + 'must be 2d or reshapable to 2d')
else:
raise TypeError('Variable ' + name + 'must be a Series, DataFrame or ndarray.')
def parse_dataframe(x, name):
if x is None:
return [name], np.empty(0)
if isinstance(x, DataFrame):
return x.columns, x.index
elif isinstance(x, Series):
return [x.name], x.index
else:
if not isinstance(name, list):
name = [name]
return name, np.arange(np.squeeze(x).shape[0])
class DocStringInheritor(type):
"""
A variation on
http://groups.google.com/group/comp.lang.python/msg/26f7b4fcb4d66c95
by Paul McGuire
"""
def __new__(mcs, name, bases, clsdict):
if not ('__doc__' in clsdict and clsdict['__doc__']):
for mro_cls in (mro_cls for base in bases for mro_cls in
base.mro()):
doc = mro_cls.__doc__
if doc:
clsdict['__doc__'] = doc
break
for attr, attribute in clsdict.items():
if not attribute.__doc__:
for mro_cls in (mro_cls for base in bases for mro_cls in
base.mro()
if hasattr(mro_cls, attr)):
doc = getattr(getattr(mro_cls, attr), '__doc__')
if doc:
attribute.__doc__ = doc
break
return type.__new__(mcs, name, bases, clsdict)
def date_to_index(date, date_index):
"""
Looks up a date in an array of dates
Parameters
----------
date : string, datetime or datetime64
Date to use when returning the index
date_index : {list, ndarray}
Index data containing datetime64 values
Returns
-------
index : int
Index location
Notes
-----
Assumes dates are increasing and unique.
"""
if not is_datetime64_dtype(date_index):
raise ValueError('date_index must be a datetime64 array')
if not np.all((np.diff(date_index.values).astype(dtype=np.int64)) > 0):
raise ValueError('date_index is not monotonic and unique')
if not isinstance(date, (dt.datetime, np.datetime64, str)):
raise ValueError("date must be a datetime, datetime64 or string")
elif isinstance(date, dt.datetime):
date = np.datetime64(date)
elif isinstance(date, str):
orig_date = date
try:
date = np.datetime64(to_datetime(date, errors='coerce'))
except (ValueError, TypeError):
raise ValueError('date:' + orig_date + ' cannot be parsed to a date.')
date_index = np.asarray(date_index)
locs = np.nonzero(date_index <= date)[0]
if locs.shape[0] == 0:
return 0
loc = locs.max()
in_array = np.any(date_index == date)
if not in_array:
loc += 1
return loc
def cutoff_to_index(cutoff, index, default):
"""
Converts a cutoff to a numerical index
Parameters
----------
cutoff : {None, str, datetime, datetime64, Timestamp)
The cutoff point to use
index : Pandas index
Pandas index
default : int
The value to return if cutoff is None
Returns
-------
val : int
Integer value of
"""
int_index = default
if isinstance(cutoff, (str, dt.datetime, np.datetime64, Timestamp)):
int_index = date_to_index(cutoff, index)
elif isinstance(cutoff, (int, long)) or issubclass(cutoff.__class__,
np.integer):
int_index = cutoff
return int_index
def find_index(s, index):
"""
Returns the numeric index for a string or datetime
Parameters
----------
s : Series or DataFrame
Series or DataFrame to use in lookup
index : datetime-like, str
Index value, either a string convertible to a datetime or a datetime
Returns
-------
loc : int
Integer location of index value
"""
if isinstance(index, (int, long, np.int, np.int64)):
return index
date_index = to_datetime(index, errors='coerce')
if date_index is NaT:
raise ValueError(index + ' cannot be converted to datetime')
loc = np.argwhere(s.index == date_index).squeeze()
if loc.size == 0:
raise ValueError('index not found')
return loc