forked from pandas-dev/pandas
-
Notifications
You must be signed in to change notification settings - Fork 0
/
list.py
149 lines (115 loc) · 3.83 KB
/
list.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
import warnings
import numpy as np
from pandas.core.base import PandasObject
from pandas.formats.printing import pprint_thing
from pandas.types.common import is_scalar
from pandas.sparse.array import SparseArray
import pandas._sparse as splib
class SparseList(PandasObject):
"""
Data structure for accumulating data to be converted into a
SparseArray. Has similar API to the standard Python list
Parameters
----------
data : scalar or array-like
fill_value : scalar, default NaN
"""
def __init__(self, data=None, fill_value=np.nan):
# see gh-13784
warnings.warn("SparseList is deprecated and will be removed "
"in a future version", FutureWarning, stacklevel=2)
self.fill_value = fill_value
self._chunks = []
if data is not None:
self.append(data)
def __unicode__(self):
contents = '\n'.join(repr(c) for c in self._chunks)
return '%s\n%s' % (object.__repr__(self), pprint_thing(contents))
def __len__(self):
return sum(len(c) for c in self._chunks)
def __getitem__(self, i):
if i < 0:
if i + len(self) < 0: # pragma: no cover
raise ValueError('%d out of range' % i)
i += len(self)
passed = 0
j = 0
while i >= passed + len(self._chunks[j]):
passed += len(self._chunks[j])
j += 1
return self._chunks[j][i - passed]
def __setitem__(self, i, value):
raise NotImplementedError
@property
def nchunks(self):
return len(self._chunks)
@property
def is_consolidated(self):
return self.nchunks == 1
def consolidate(self, inplace=True):
"""
Internally consolidate chunks of data
Parameters
----------
inplace : boolean, default True
Modify the calling object instead of constructing a new one
Returns
-------
splist : SparseList
If inplace=False, new object, otherwise reference to existing
object
"""
if not inplace:
result = self.copy()
else:
result = self
if result.is_consolidated:
return result
result._consolidate_inplace()
return result
def _consolidate_inplace(self):
new_values = np.concatenate([c.sp_values for c in self._chunks])
new_index = _concat_sparse_indexes([c.sp_index for c in self._chunks])
new_arr = SparseArray(new_values, sparse_index=new_index,
fill_value=self.fill_value)
self._chunks = [new_arr]
def copy(self):
"""
Return copy of the list
Returns
-------
new_list : SparseList
"""
new_splist = SparseList(fill_value=self.fill_value)
new_splist._chunks = list(self._chunks)
return new_splist
def to_array(self):
"""
Return SparseArray from data stored in the SparseList
Returns
-------
sparr : SparseArray
"""
self.consolidate(inplace=True)
return self._chunks[0]
def append(self, value):
"""
Append element or array-like chunk of data to the SparseList
Parameters
----------
value: scalar or array-like
"""
if is_scalar(value):
value = [value]
sparr = SparseArray(value, fill_value=self.fill_value)
self._chunks.append(sparr)
self._consolidated = False
def _concat_sparse_indexes(indexes):
all_indices = []
total_length = 0
for index in indexes:
# increment by offset
inds = index.to_int_index().indices + total_length
all_indices.append(inds)
total_length += index.length
return splib.IntIndex(total_length, np.concatenate(all_indices))