### Permutation

In [9]:
import pandas as pd
suits = "H,S,C,D".split(",")
card_numbers = ["A"] + list(range(2,11)) + "J,Q,K".split(",")
card_values = list(range(1,11)) + [10]*3

cards = []
for suit in suits:
    cards.extend(str(num) + "_" + suit for num in card_numbers)

deck = pd.Series(card_values*4, cards)
deck.sample(5)


3_S     3
J_C    10
A_C     1
K_C    10
A_S     1
dtype: int64

In [13]:
def get_suit(card):
    return card[-1]

def draw(cards, size=2):
    return cards.sample(size)

deck.groupby(get_suit).apply(draw)

C  9_C     9
   J_C    10
D  7_D     7
   Q_D    10
H  K_H    10
   A_H     1
S  6_S     6
   2_S     2
dtype: int64

### Groupwise Weighted Average

In [3]:
import numpy as np
import pandas as pd

data = {
    "category": np.random.choice(list("abcd"), 100),
    "value": np.random.randn(100),
    "weight": np.random.rand(100)
}

df_data = pd.DataFrame(data)
grouped = df_data.groupby("category")

weighted_average = lambda g: np.average(g["value"], weights=g["weight"])

grouped.apply(weighted_average)

category
a   -0.309472
b   -0.261428
c    0.218933
d    0.156017
dtype: float64

### Group Wise Correlation

In [22]:
from pandas_datareader import data as pdata
stock_data = {ticker: pdata.get_data_yahoo(ticker) for ticker in ['AAPL', 'IBM', 'MSFT', 'GOOG']}
df_stock_data = pd.DataFrame({ticker:a["Close"] for ticker,a in stock_data.items()})
df_stock_data.head()

Unnamed: 0_level_0,AAPL,IBM,MSFT,GOOG
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2009-12-31,30.104286,130.899994,30.48,307.986847
2010-01-04,30.572857,132.449997,30.950001,311.349976
2010-01-05,30.625713,130.850006,30.959999,309.978882
2010-01-06,30.138571,130.0,30.77,302.164703
2010-01-07,30.082857,129.550003,30.450001,295.130463


In [33]:
def correlation(g, col="MSFT"):
    return g.corrwith(g[col])

get_year = lambda x: x.year
    
correlation(df_stock_data, "AAPL")
pct_change = df_stock_data.pct_change().dropna()
grouped_by_year = pct_change.groupby(get_year)
grouped_by_year.apply(correlation)

Unnamed: 0,AAPL,IBM,MSFT,GOOG
2010,0.571708,0.669974,1.0,0.521277
2011,0.575583,0.682193,1.0,0.616243
2012,0.277431,0.572112,1.0,0.204794
2013,0.083793,0.184057,1.0,0.23138
2014,0.228143,0.385934,1.0,0.469504
2015,0.52207,0.550173,1.0,0.521514
2016,0.488847,0.435455,1.0,0.697408
2017,0.438259,0.064651,1.0,0.61883
2018,0.695287,0.592144,1.0,0.822209
2019,0.754762,0.516972,1.0,0.706914


### Regress

In [37]:
import statsmodels.api as sm
def regress(data, yvar_col, xvar_cols):
    y = data[yvar_col]
    x = data[xvar_cols]
    x['intercept'] = 1.
    result = sm.OLS(x,y).fit()
    return result.params

grouped_by_year.apply(regress, "AAPL", ["MSFT"])
    

Unnamed: 0,Unnamed: 1,0,1
2010,AAPL,0.461871,6.396786
2011,AAPL,0.511457,3.801129
2012,AAPL,0.197188,3.649948
2013,AAPL,0.074893,1.157898
2014,AAPL,0.20465,7.269445
2015,AAPL,0.551677,-0.166938
2016,AAPL,0.475362,2.256277
2017,AAPL,0.376087,12.491371
2018,AAPL,0.683949,-0.356881
2019,AAPL,0.539258,7.223102
