# dplyrの実践

In [2]:
library(dplyr)
        

## データフレームを作成


In [3]:
set.seed(1)
d <- data.frame(day1 =1:10,
                            day2 = 11:20,
                            s = sample(c("anpan","currypan","syokupan"),10,replace = TRUE),
                            row.names=letters[1:10])

#アンパンマン、カレーパンマン、食パンマンが、１日目と２日目にパトロールを行った距離

d

Unnamed: 0,day1,day2,s
a,1,11,anpan
b,2,12,currypan
c,3,13,currypan
d,4,14,syokupan
e,5,15,anpan
f,6,16,syokupan
g,7,17,syokupan
h,8,18,currypan
i,9,19,currypan
j,10,20,anpan


### 数値フィルタ

In [4]:
d  %>% filter(day1==4) #day1に4が含まれる行を抽出
d  %>% filter(day1 != 4) #  day1に４が含まれない行を抽出
d  %>% filter(day1<4) # day1が4未満
d  %>% filter(day1>=4) # day1が4以上
d  %>% filter(between(day1,3,7)) # day1に3から７が含まれる行を抽出



day1,day2,s
4,14,syokupan


day1,day2,s
1,11,anpan
2,12,currypan
3,13,currypan
5,15,anpan
6,16,syokupan
7,17,syokupan
8,18,currypan
9,19,currypan
10,20,anpan


day1,day2,s
1,11,anpan
2,12,currypan
3,13,currypan


day1,day2,s
4,14,syokupan
5,15,anpan
6,16,syokupan
7,17,syokupan
8,18,currypan
9,19,currypan
10,20,anpan


day1,day2,s
3,13,currypan
4,14,syokupan
5,15,anpan
6,16,syokupan
7,17,syokupan


### 複数条件

In [5]:
d  %>% filter(day1<8 & day2>12) 

day1,day2,s
3,13,currypan
4,14,syokupan
5,15,anpan
6,16,syokupan
7,17,syokupan


In [6]:
d  %>% filter(day1>8 | day2<15) #|は or 

day1,day2,s
1,11,anpan
2,12,currypan
3,13,currypan
4,14,syokupan
9,19,currypan
10,20,anpan


In [7]:
d  %>% filter(xor(day1<8,day2>12)) #排他的論理和

day1,day2,s
1,11,anpan
2,12,currypan
8,18,currypan
9,19,currypan
10,20,anpan


### 文字列フィルタ

In [8]:
d  %>% filter(s=="currypan") # カレーパンだけを抽出

day1,day2,s
2,12,currypan
3,13,currypan
8,18,currypan
9,19,currypan


In [9]:
d  %>% filter(s !="currypan") #カレーパン以外

day1,day2,s
1,11,anpan
4,14,syokupan
5,15,anpan
6,16,syokupan
7,17,syokupan
10,20,anpan


### 正規表現を利用した一致条件の指定

In [10]:
library(stringr)
d  %>% filter(str_detect(s,"^c"))

“package ‘stringr’ was built under R version 3.2.5”

day1,day2,s
2,12,currypan
3,13,currypan
8,18,currypan
9,19,currypan


In [11]:
d  %>% filter(str_detect(s,"u")) # uが含まれる行を抽出

day1,day2,s
2,12,currypan
3,13,currypan
4,14,syokupan
6,16,syokupan
7,17,syokupan
8,18,currypan
9,19,currypan


In [12]:
d  %>% filter(s %in% c("currypan","syokupan")) 

day1,day2,s
2,12,currypan
3,13,currypan
4,14,syokupan
6,16,syokupan
7,17,syokupan
8,18,currypan
9,19,currypan


In [13]:
1:10 %in% 3:5 # １から１０の各要素が右側の要素のどれかにマッチするか

# select （列を抽出）

In [14]:
d  %>% dplyr::select(day1)



Unnamed: 0,day1
a,1
b,2
c,3
d,4
e,5
f,6
g,7
h,8
i,9
j,10


In [15]:
d %>% dplyr::select(day1,s)

Unnamed: 0,day1,s
a,1,anpan
b,2,currypan
c,3,currypan
d,4,syokupan
e,5,anpan
f,6,syokupan
g,7,syokupan
h,8,currypan
i,9,currypan
j,10,anpan


# summarise　（集約する）

In [16]:
d %>% dplyr::summarize(mean(day1),mean(day2))

mean(day1),mean(day2)
5.5,15.5


## summarise_each

In [17]:
d %>% summarise_each(funs(max,min,mean),day1,day2)

day1_max,day2_max,day1_min,day2_min,day1_mean,day2_mean
10,20,1,11,5.5,15.5


## group_by　（グルーピング）

In [18]:
d %>% 
    dplyr::group_by(s)  


Unnamed: 0,day1,day2,s
a,1,11,anpan
b,2,12,currypan
c,3,13,currypan
d,4,14,syokupan
e,5,15,anpan
f,6,16,syokupan
g,7,17,syokupan
h,8,18,currypan
i,9,19,currypan
j,10,20,anpan


## arrange （並び替え）

In [19]:
d  %>%  dplyr::arrange(desc(day1))  ->d_arrange  #降順並び替え

d_arrange

day1,day2,s
10,20,anpan
9,19,currypan
8,18,currypan
7,17,syokupan
6,16,syokupan
5,15,anpan
4,14,syokupan
3,13,currypan
2,12,currypan
1,11,anpan


In [20]:
d_arrange  %>% dplyr::arrange(day1) #昇順並び替え

day1,day2,s
1,11,anpan
2,12,currypan
3,13,currypan
4,14,syokupan
5,15,anpan
6,16,syokupan
7,17,syokupan
8,18,currypan
9,19,currypan
10,20,anpan


## mutate （列の追加）

In [21]:
d  %>% dplyr::mutate(new_column=ifelse(day2 >15,1,0))

day1,day2,s,new_column
1,11,anpan,0
2,12,currypan,0
3,13,currypan,0
4,14,syokupan,0
5,15,anpan,0
6,16,syokupan,1
7,17,syokupan,1
8,18,currypan,1
9,19,currypan,1
10,20,anpan,1


In [22]:
#新しい列をキーに列を追加することも可能
dplyr::mutate(d,new_column=ifelse(day2 >15,1,0),new=ifelse(new_column==1,2,0))

day1,day2,s,new_column,new
1,11,anpan,0,0
2,12,currypan,0,0
3,13,currypan,0,0
4,14,syokupan,0,0
5,15,anpan,0,0
6,16,syokupan,1,2
7,17,syokupan,1,2
8,18,currypan,1,2
9,19,currypan,1,2
10,20,anpan,1,2


***
# 組み合わせて使う
## キャラごとのパトロール距離を求める

In [23]:
d %>% dplyr::group_by(s)　%>% 
            dplyr::summarize(sum(day1),sum(day2) )

s,sum(day1),sum(day2)
anpan,16,46
currypan,22,62
syokupan,17,47


In [24]:
d %>% dplyr::mutate("day3" =day1*2) %>% 
    dplyr::group_by(s) %>% 
    dplyr::summarize(sum(day1),sum(day2),sum(day3))

s,sum(day1),sum(day2),sum(day3)
anpan,16,46,32
currypan,22,62,44
syokupan,17,47,34
