In [1]:
import pandas as pd
import numpy as np
from dplython import (DplyFrame, X, diamonds, select, sift, sample_n,
    sample_frac, head, arrange, mutate, group_by, summarize, DelayFunction) 

In [5]:
combine = DplyFrame(pd.read_csv('./combine.csv'))

In [6]:
combine.head()

Unnamed: 0,id,Year,Name,Position,HeightFeet,HeightInches,Weight,Arms,Hands,FortyYD,...,Bench,Round,College,Pick,PickRound,PickTotal,FirstName,LastName,HeightInchesTotal,Wonderlic
0,8984,2013,Quanterus Smith,DE,6,5.0,250,33.25,10.375,0.0,...,0,5,Western Kentucky,13(146),13,146,Quanterus,Smith,77.0,0
1,9002,2013,Abry Jones,DT,6,3.0,313,35.0,9.75,0.0,...,30,0,,,0,0,Abry,Jones,75.0,0
2,9004,2013,Bennie Logan,DT,6,2.0,309,34.0,10.25,0.0,...,30,3,LSU,5(67),5,67,Bennie,Logan,74.0,0
3,9012,2013,John Boyett,FS,5,10.0,204,30.5,8.5,0.0,...,27,6,Oregon,24(192),24,192,John,Boyett,70.0,0
4,9018,2013,Bacarri Rambo,FS,6,0.0,211,31.0,9.25,0.0,...,17,6,Georgia,23(191),23,191,Bacarri,Rambo,72.0,0


In [9]:
# Select specific columns of the DataFrame using select, and 
# get the first few using head
tmp = combine >> select(X.Position, X.Weight, X.Hands)
print tmp.shape
tmp.head()

(4625, 3)
  Position  Weight   Hands
0       DE     250  10.375
1       DT     313   9.750
2       DT     309  10.250
3       FS     204   8.500
4       FS     211   9.250


In [11]:
# Filter out rows using sift
tmp = combine >> sift(X.Weight < 200) >> select(X.Position, X.Weight, X.Hands, X.PickTotal)
print tmp.shape
tmp.head()

(797, 4)


Unnamed: 0,Position,Weight,Hands,PickTotal
15,CB,182,9.375,123
49,RB,193,0.0,0
55,WR,192,9.0,54
56,CB,183,9.0,0
58,CB,190,0.0,156


In [13]:
# Sample with sample_n or sample_frac,
tmp = (combine >> 
       sample_n(10) >>
       select(X.Position, X.Weight, X.Hands, X.PickTotal))

print tmp.shape
tmp

(10, 4)


Unnamed: 0,Position,Weight,Hands,PickTotal
95,OT,316,0.0,0
29,OG,319,0.0,178
4405,OT,309,0.0,156
2815,OLB,258,0.0,0
3079,TE,264,0.0,181
2990,DE,269,0.0,0
3986,OC,302,0.0,0
811,RB,210,9.0,0
155,CB,193,9.625,24
3501,DT,300,0.0,139


In [15]:
# sort with arrange
tmp = (combine >> 
       sample_n(10) >>
       arrange(X.Weight) >> 
       select(X.Position, X.Weight, X.Hands, X.PickTotal))
tmp

Unnamed: 0,Position,Weight,Hands,PickTotal
129,RB,197,0.0,24
1071,WR,204,9.375,128
949,RB,204,0.0,172
119,SS,210,0.0,0
13,QB,231,9.125,234
3154,TE,238,0.0,0
3143,TE,249,0.0,104
3047,ILB,251,0.0,0
6,OC,302,10.25,121
4611,OG,367,0.0,0


In [27]:
# You can: 
# add columns with mutate (referencing other columns!)
# group rows into dplyr-style groups with group_by
# collapse rows into single rows using sumarize
tmp = (combine >> 
       sample_n(10) >>
       mutate(mean_of_weight_hands=(X.Weight+X.Hands)/2.) >> 
       select(X.Position, X.Weight, X.Hands, X.PickTotal, X.mean_of_weight_hands))
tmp


Unnamed: 0,Position,Weight,Hands,PickTotal,mean_of_weight_hands
811,RB,210,9.0,0,109.5
312,CB,178,0.0,31,89.0
2975,DE,249,0.0,7,124.5
2586,DE,264,0.0,103,132.0
3805,DT,305,0.0,154,152.5
3971,DT,315,0.0,190,157.5
1182,FS,199,0.0,114,99.5
873,ILB,245,0.0,154,122.5
4371,OT,290,0.0,0,145.0
1030,FS,190,0.0,0,95.0


In [30]:
# group by columns
tmp = (combine >>
       group_by(X.Position, X.Year) >> 
       summarize(avg_weight=X.Weight.mean()) >>
       arrange(X.Year, X.Position))
tmp.head(10)

Unnamed: 0,Position,Year,avg_weight
1,CB,1999,184.807692
17,DE,1999,269.15625
33,DT,1999,295.733333
49,FB,1999,236.583333
63,FS,1999,199.882353
79,ILB,1999,242.84
98,OC,1999,304.733333
113,OG,1999,311.724138
129,OLB,1999,239.923077
145,OT,1999,321.53125


In [35]:
# To pass the DataFrame or columns into functions, apply @DelayFunction
@DelayFunction
def MeanOfColumns(df):
    meancol = df.mean(axis=1)
    return meancol

tmp = (combine >>
       select(X.Year, X.Weight, X.Hands) >>
       mutate(Mean_of_cols = (combine >> 
                              select(X.Year, X.Weight, X.Hands) >> 
                              MeanOfColumns(X))))
tmp

Unnamed: 0,Year,Weight,Hands,Mean_of_cols
0,2013,250,10.375,757.791667
1,2013,313,9.750,778.583333
2,2013,309,10.250,777.416667
3,2013,204,8.500,741.833333
4,2013,211,9.250,744.416667
5,2013,243,10.500,755.500000
6,2013,302,10.250,775.083333
7,2013,332,9.875,784.958333
8,2013,320,9.750,780.916667
9,2013,243,9.375,755.125000
