-
Notifications
You must be signed in to change notification settings - Fork 116
/
managers.py
184 lines (146 loc) · 6.89 KB
/
managers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
from django.db.models.query import QuerySet
import numpy as np
import pandas as pd
from model_utils.managers import PassThroughManager
class DataFrameQuerySet(QuerySet):
def to_pivot_table(self, *fields, **kwargs):
"""
A convenience method for creating a time series i.e the
DataFrame index is instance of a DateTime or PeriodIndex
Parameters
----------
fields: The model fields to utilise in creating the frame.
to span a relationship, just use the field name of related
fields across models, separated by double underscores,
values : column to aggregate, optional
rows : list of column names or arrays to group on
Keys to group on the x-axis of the pivot table
cols : list of column names or arrays to group on
Keys to group on the y-axis of the pivot table
aggfunc : function, default numpy.mean, or list of functions
If list of functions passed, the resulting pivot table will have
hierarchical columns whose top level are the function names
(inferred from the function objects themselves)
fill_value : scalar, default None
Value to replace missing values with
margins : boolean, default False
Add all row / columns (e.g. for subtotal / grand totals)
dropna : boolean, default True
Do not include columns whose entries are all NaN
"""
df = self.to_dataframe(*fields)
values = kwargs.pop('values')
rows = kwargs.pop('rows')
cols = kwargs.pop('cols')
aggfunc = kwargs.pop('aggfunc', np.mean)
fill_value = kwargs.pop('fill_value', None)
margins = kwargs.pop('margins', False)
dropna = kwargs.pop('dropna', False)
return pd.pivot_table(df, values=values,
fill_value=fill_value,
rows=rows, cols=cols,
aggfunc=aggfunc,
margins=margins,
dropna=dropna)
def to_timeseries(self, *fields, **kwargs):
"""
A convenience method for creating a time series i.e the
DataFrame index is instance of a DateTime or PeriodIndex
Parameters
----------
fields: The model fields to utilise in creating the frame.
to span a relationship, just use the field name of related
fields across models, separated by double underscores,
index: specify the field to use for the index. If the index
field is not in the field list it will be appended. This
is mandatory.
storage: Specify if the queryset uses the `wide` or `long` format
for data.
pivot_column: Required once the you specify `long` format
storage. This could either be a list or string identifying
the field name or combination of field. If the pivot_column
is a single column then the unique values in this column become
a new columns in the DataFrame
If the pivot column is a list the values in these columns are
concatenated (using the '-' as a separator)
and these values are used for the new timeseries columns
values: Also required if you utilize the `long` storage the
values column name is use for populating new frame values
freq: the offset string or object representing a target conversion
rs_kwargs: Arguments based on pandas.DataFrame.resample
"""
index = kwargs.pop('index', None)
if not index:
raise AssertionError('You must supply an index field')
storage = kwargs.get('storage', 'wide')
if storage not in ['wide', 'long']:
raise AssertionError('storage must be wide or long')
if storage == 'wide':
df = self.to_dataframe(*fields, index=index)
else:
df = self.to_dataframe(*fields)
values = kwargs.get('values', None)
if values is None:
raise AssertionError('You must specify a values field')
pivot_columns = kwargs.get('pivot_columns', None)
if pivot_columns is None:
raise AssertionError('You must specify pivot_columns')
if isinstance(pivot_columns, list):
df['combined_keys'] = ''
for c in pivot_columns:
df['combined_keys'] += df[c].str.upper() + '.'
df['combined_keys'] += values.lower()
df = df.pivot(index=index,
columns='combined_keys',
values=values)
else:
df = df.pivot(index=index,
columns=pivot_columns,
values=values)
rule = kwargs.get('freq', None)
if rule:
rs_kwargs = kwargs.get('rs_kwargs', None)
if rs_kwargs:
df = df.resample(rule, **rs_kwargs)
else:
df = df.resample(rule)
return df
def to_dataframe(self, *fields, **kwargs):
"""
Returns a DataFrame from the queryset
Paramaters
-----------
fields: The model fields to utilise in creating the frame.
to span a relationship, just use the field name of related
fields across models, separated by double underscores,
index: specify the field to use for the index. If the index
field is not in the field list it will be appended
fill_na: fill in missing observations using one of the following
this is a string specifying a pandas fill method
{'backfill, 'bill', 'pad', 'ffill'} or a scalar value
coerce_float: Attempt to convert the numeric non-string fields
like object, decimal etc. to float if possible
"""
index = kwargs.pop('index', None)
fill_na = kwargs.pop('fill_na', None)
coerce_float = kwargs.pop('coerce_float', False)
if not fields:
fields = tuple(self.model._meta.get_all_field_names())
if index is not None:
# add it to the fields if not already there
if index not in fields:
fields = fields + (index,)
qs = self.values_list(*fields)
recs = np.core.records.fromrecords(qs, names=qs.field_names)
df = pd.DataFrame.from_records(recs, coerce_float=coerce_float)
if index is not None:
df = df.set_index(index)
if fill_na is not None:
if fill_na not in ['backfill', 'bfill', 'pad', 'ffill']:
df = df.fillna(value=fill_na)
else:
df = df.fillna(method=fill_na)
return df
class DataFrameManager(PassThroughManager):
def get_query_set(self):
return DataFrameQuerySet(self.model)