forked from santosjorge/cufflinks
/
pandastools.py
62 lines (52 loc) · 1.49 KB
/
pandastools.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
import pandas as pd
import re
def _screen(self,include=True,**kwargs):
"""
Filters a DataFrame for columns that contain the given strings.
Parameters:
-----------
include : bool
If False then it will exclude items that match
the given filters.
This is the same as passing a regex ^keyword
kwargs :
Key value pairs that indicate the column and
value to screen for
Example:
df.screen(col1='string_to_match',col2=['string1','string2'])
"""
df=self.copy()
for k,v in kwargs.items():
v=[v] if type(v)!=list else v
if include:
df=df[df[k].str.contains('|'.join(v),flags=re.IGNORECASE).fillna(False)]
else:
df=df[df[k].str.contains('|'.join(v),flags=re.IGNORECASE).fillna(False)==False]
return df
def _swapcolumns(self):
"""
Swaps first and second columns.
Useful for inverting axis when plotting.
Example:
df.swapcolumns()
Returns : DataFrame
"""
return self.reindex_axis([self.columns[1],self.columns[0]],axis=1)
def bestfit(self):
"""
Returns a series with the bestfit values.
Example:
Series.bestfit()
Returns: series
The returned series contains a parameter
called 'formula' which includes the string representation
of the bestfit line.
"""
x=pd.Series(range(1,len(self)+1),index=self.index)
model=pd.ols(x=x,y=self,intercept=True)
best_fit=model.y_fitted
best_fit.formula='%.2f*x+%.2f' % (model.beta.x,model.beta.intercept)
return best_fit
pd.DataFrame.screen=_screen
pd.DataFrame.swapcolumns=_swapcolumns
pd.Series.bestfit=bestfit