In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Example: Random Sampling and Permutation

In [15]:
suits = ['A', 'B', 'C', 'D']
card_val = (list(range(1, 5)) + [6] * 3) *4
base_names = ['M'] + list(range(2, 5)) + ['X', 'Y', 'Z']
cards = []

for suit in ['A', 'B', 'C', 'D']:
    cards.extend(str(num) + suit for num in base_names)
    #The extend() method adds all the elements of an iterable (list, tuple, string etc.) to the end of the list.
    
    
deck = pd.Series(card_val, index=cards)

In [16]:
deck[:8]

MA    1
2A    2
3A    3
4A    4
XA    6
YA    6
ZA    6
MB    1
dtype: int64

In [17]:
def draw(deck, n=5):
    return deck.sample(n) #random sampling 

In [18]:
draw(deck)

MA    1
ZB    6
XC    6
YD    6
YC    6
dtype: int64

In [19]:
get_suit = lambda card: card[-1] # last letter is suit
deck.groupby(get_suit).apply(draw, n=1) #random draw

A  3A    3
B  3B    3
C  3C    3
D  MD    1
dtype: int64

In [20]:
deck.groupby(get_suit, group_keys=False).apply(draw, n=2)

XA    6
YA    6
MB    1
XB    6
2C    2
ZC    6
XD    6
ZD    6
dtype: int64

# Example: Group Weighted Average and Correlation

In [21]:
df = pd.DataFrame({'category': ['a', 'a', 'a', 'a','b', 'b', 'b', 'b'],'data': np.random.randn(8),'weights': np.random.rand(8)})
df

Unnamed: 0,category,data,weights
0,a,0.798795,0.43139
1,a,-0.671584,0.137247
2,a,-0.083814,0.364682
3,a,-2.41444,0.647392
4,b,-0.171887,0.529625
5,b,0.673453,0.994477
6,b,0.736119,0.603345
7,b,2.6257,0.269649


In [22]:
grouped = df.groupby('category')
w_avg = lambda g: np.average(g['data'], weights=g['weights'])
grouped.apply(w_avg)

category
a   -0.848501
b    0.722061
dtype: float64

In [23]:
close_px = pd.read_csv('https://raw.githubusercontent.com/BrambleXu/pydata-notebook/master/examples/stock_px_2.csv', parse_dates=True,index_col=0)

In [24]:
close_px.head(5)

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003-01-02,7.4,21.11,29.22,909.03
2003-01-03,7.45,21.14,29.24,908.59
2003-01-06,7.45,21.52,29.96,929.01
2003-01-07,7.43,21.93,28.95,922.93
2003-01-08,7.28,21.31,28.83,909.93


In [25]:
 close_px.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2214 entries, 2003-01-02 to 2011-10-14
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AAPL    2214 non-null   float64
 1   MSFT    2214 non-null   float64
 2   XOM     2214 non-null   float64
 3   SPX     2214 non-null   float64
dtypes: float64(4)
memory usage: 86.5 KB


In [26]:
close_px[-4:] #last 4

Unnamed: 0,AAPL,MSFT,XOM,SPX
2011-10-11,400.29,27.0,76.27,1195.54
2011-10-12,402.19,26.96,77.16,1207.25
2011-10-13,408.43,27.18,76.37,1203.66
2011-10-14,422.0,27.27,78.11,1224.58


**DataFrame.corrwith(DataFrame/Series, axis=0/1, drop=Default False, method='pearson')**

**DataFrame.pct_change(periods=1, fill_method='pad', limit=None, freq=None, **kwargs) :Percentage change between the current and a prior element.**

In [27]:
spx_corr = lambda x: x.corrwith(x['SPX'])

In [28]:
rets = close_px.pct_change().dropna()

In [29]:
get_year = lambda x: x.year

In [30]:
by_year = rets.groupby(get_year)

In [31]:
by_year.apply(spx_corr)

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003,0.541124,0.745174,0.661265,1.0
2004,0.374283,0.588531,0.557742,1.0
2005,0.46754,0.562374,0.63101,1.0
2006,0.428267,0.406126,0.518514,1.0
2007,0.508118,0.65877,0.786264,1.0
2008,0.681434,0.804626,0.828303,1.0
2009,0.707103,0.654902,0.797921,1.0
2010,0.710105,0.730118,0.839057,1.0
2011,0.691931,0.800996,0.859975,1.0


In [32]:
by_year.apply(lambda g: g['AAPL'].corr(g['MSFT']))

2003    0.480868
2004    0.259024
2005    0.300093
2006    0.161735
2007    0.417738
2008    0.611901
2009    0.432738
2010    0.571946
2011    0.581987
dtype: float64

# Example: Group-Wise Linear Regression

In [33]:
import statsmodels.api as sm
def regress(data, yvar, xvars):
    Y = data[yvar]
    X = data[xvars]
    X['intercept'] = 1.
    result = sm.OLS(Y, X).fit() #Ordinary Least Squares
    return result.params

In [34]:
by_year.apply(regress, 'AAPL', ['SPX'])

Unnamed: 0,SPX,intercept
2003,1.195406,0.00071
2004,1.363463,0.004201
2005,1.766415,0.003246
2006,1.645496,8e-05
2007,1.198761,0.003438
2008,0.968016,-0.00111
2009,0.879103,0.002954
2010,1.052608,0.001261
2011,0.806605,0.001514
