# basic functions

### order using more than two columns
- sort rows with 1st col and then sort with 2nd col

In [None]:
my_frame_num[order(my_frame_num[,2], -my_frame_num[,3]),]

### rosSums & colSums

In [None]:
temp <- matrix(seq(1:10), 2, 5)
rowSums(temp)
colSums(temp)

### combn function

In [None]:
labels <- paste("Sample", 1:5, sep="")
combn(labels, m=2, FUN=paste, collapse="-")

### aggregate function

In [None]:
aggregate(iris[,1:4], by=list(iris$Species), FUN=mean, na.rm=T)

### missing function
- 'missing()' is used to test whether a value was specified as an argument
- like 'None idea' in Python

In [None]:
myfct2 <- function(x1=5, opt_arg) {
if(missing(opt_arg)) { 
  z1 <- 1:10 
} else {
  z1 <- opt_arg 
}   
cat("my function returns:", "\n")
return(z1/x1)
}

### stop and warning

In [None]:
myfct <- function(x1) {
if (x1>=0) print(x1) else stop("This function did not finish, because x1 < 0")
warning("Value needs to be > 0")
}
myfct(x1=2)

# dplyr

In [5]:
library(dplyr)


Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union



### summarise

In [9]:
summarise(mtcars, mpg_mean=mean(mpg))

mpg_mean
20.09062


In [10]:
summarise_all(mtcars, mean,  na.rm=T)

mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
20.09062,6.1875,230.7219,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,2.8125


In [11]:
summarise_all(mtcars, c("min", "max"))

mpg_min,cyl_min,disp_min,hp_min,drat_min,wt_min,qsec_min,vs_min,am_min,gear_min,...,cyl_max,disp_max,hp_max,drat_max,wt_max,qsec_max,vs_max,am_max,gear_max,carb_max
10.4,4,71.1,52,2.76,1.513,14.5,0,0,3,...,8,472,335,4.93,5.424,22.9,1,1,5,8


In [12]:
summarise_all(mtcars, funs(mean=mean(., na.rm=T), max=max(., na.rm=T)))

mpg_mean,cyl_mean,disp_mean,hp_mean,drat_mean,wt_mean,qsec_mean,vs_mean,am_mean,gear_mean,...,cyl_max,disp_max,hp_max,drat_max,wt_max,qsec_max,vs_max,am_max,gear_max,carb_max
20.09062,6.1875,230.7219,146.6875,3.596563,3.21725,17.84875,0.4375,0.40625,3.6875,...,8,472,335,4.93,5.424,22.9,1,1,5,8


In [13]:
summarise_at(iris, c("Sepal.Width", "Petal.Width"), mean)

Sepal.Width,Petal.Width
3.057333,1.199333


In [6]:
summarise_at(iris, vars(Petal.Width), mean, trim = 1)

Petal.Width
1.3


In [14]:
summarise_at(iris, vars(Petal.Width), mean)

Petal.Width
1.199333


In [15]:
summarise_at(iris, vars(Petal.Width, Sepal.Width), funs(min, max))

Petal.Width_min,Sepal.Width_min,Petal.Width_max,Sepal.Width_max
0.1,2,2.5,4.4


In [16]:
summarise_at(iris, vars(Petal.Width, Sepal.Width), funs(mean=mean(., na.rm=T), max=max(., na.rm=T)))

Petal.Width_mean,Sepal.Width_mean,Petal.Width_max,Sepal.Width_max
1.199333,3.057333,2.5,4.4


In [17]:
summarise_if(iris,  is.numeric, mean )

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
5.843333,3.057333,3.758,1.199333


### group_by

In [19]:
iris %>% group_by(Species) %>% summarise_all(mean)

Species,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
setosa,5.006,3.428,1.462,0.246
versicolor,5.936,2.77,4.26,1.326
virginica,6.588,2.974,5.552,2.026


### n & tally & count

In [21]:
iris %>% group_by(Species) %>% summarise(cnt=n())

Species,cnt
setosa,50
versicolor,50
virginica,50


In [22]:
# tally() is short-hand for summarise()
tally(mtcars)

n
32


In [24]:
mtcars %>% group_by(cyl) %>% tally()

cyl,n
4,11
6,7
8,14


In [25]:
# count() is a short-hand for group_by() + tally()
mtcars %>% count(cyl, sort=TRUE)

cyl,n
8,14
4,11
6,7


### select

In [45]:
iris %>% select(Sepal.Length, Species) %>% head

Sepal.Length,Species
5.1,setosa
4.9,setosa
4.7,setosa
4.6,setosa
5.0,setosa
5.4,setosa


In [46]:
select(iris, starts_with("Sepal")) %>% head(1)

Sepal.Length,Sepal.Width
5.1,3.5


In [47]:
select(iris, ends_with("Width")) %>% head(1)

Sepal.Width,Petal.Width
3.5,0.2


In [48]:
select(iris, contains("Length")) %>% head(1)

Sepal.Length,Petal.Length
5.1,1.4


In [49]:
# you can select columns continuously by name
select(iris, Petal.Length:Species ) %>% head(1)

Petal.Length,Petal.Width,Species
1.4,0.2,setosa


In [50]:
select(iris, matches("th")) %>% head(1)

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width
5.1,3.5,1.4,0.2


In [65]:
select(iris, one_of(c("Petal.Length", "Petal.Width"))) %>% head(1)

Petal.Length,Petal.Width
1.4,0.2


###  filter & distinct

In [26]:
iris %>% filter(Sepal.Length > 7) %>% head()

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
7.1,3.0,5.9,2.1,virginica
7.6,3.0,6.6,2.1,virginica
7.3,2.9,6.3,1.8,virginica
7.2,3.6,6.1,2.5,virginica
7.7,3.8,6.7,2.2,virginica
7.7,2.6,6.9,2.3,virginica


In [27]:
distinct(iris, Species)

Species
setosa
versicolor
virginica


### sampling

In [29]:
iris %>% sample_frac(0.5, replace=TRUE) %>%  NROW

In [30]:
iris %>% sample_n(6, replace = TRUE)

Unnamed: 0,Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
131,7.4,2.8,6.1,1.9,virginica
16,5.7,4.4,1.5,0.4,setosa
51,7.0,3.2,4.7,1.4,versicolor
69,6.2,2.2,4.5,1.5,versicolor
113,6.8,3.0,5.5,2.1,virginica
116,6.4,3.2,5.3,2.3,virginica


### slice & top_n & arrange
- slice: select rows by position
- arrange: order rows

In [32]:
head(iris)

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
5.1,3.5,1.4,0.2,setosa
4.9,3.0,1.4,0.2,setosa
4.7,3.2,1.3,0.2,setosa
4.6,3.1,1.5,0.2,setosa
5.0,3.6,1.4,0.2,setosa
5.4,3.9,1.7,0.4,setosa


In [33]:
slice(iris, 2:4)

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
4.9,3.0,1.4,0.2,setosa
4.7,3.2,1.3,0.2,setosa
4.6,3.1,1.5,0.2,setosa


In [42]:
iris %>% arrange(desc(Sepal.Width)) %>% head(5)

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
5.7,4.4,1.5,0.4,setosa
5.5,4.2,1.4,0.2,setosa
5.2,4.1,1.5,0.1,setosa
5.8,4.0,1.2,0.2,setosa
5.4,3.9,1.7,0.4,setosa


In [44]:
# 정렬 중에 값이 같아서 2개가 동시에 추가된 것.
iris %>% top_n(5, Sepal.Width)

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
5.4,3.9,1.7,0.4,setosa
5.8,4.0,1.2,0.2,setosa
5.7,4.4,1.5,0.4,setosa
5.4,3.9,1.3,0.4,setosa
5.2,4.1,1.5,0.1,setosa
5.5,4.2,1.4,0.2,setosa


### mutate
- mutate: compute new columns
- transmutate: compute new columns and drop others
- mutate_all: apply funs to every column
- mutate_if/ mutate_at: apply funs to specific columns

In [67]:
mtcars %>% mutate(gpm=1/mpg) %>% head(5)

mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb,gpm
21.0,6,160,110,3.9,2.62,16.46,0,1,4,4,0.04761905
21.0,6,160,110,3.9,2.875,17.02,0,1,4,4,0.04761905
22.8,4,108,93,3.85,2.32,18.61,1,1,4,1,0.04385965
21.4,6,258,110,3.08,3.215,19.44,1,0,3,1,0.04672897
18.7,8,360,175,3.15,3.44,17.02,0,0,3,2,0.05347594


In [68]:
transmute(mtcars, gpm=1/mpg) %>% head(5)

gpm
0.04761905
0.04761905
0.04385965
0.04672897
0.05347594


In [83]:
# funs이 하나일 때, 이름 없이 쓰면 기존 변수에 바로 적용
mutate_all(faithful, funs(log(.))) %>% head(1)

eruptions,waiting
1.280934,4.369448


In [84]:
# funcs이 하나일 때, 이름 지정하면 기존 변수 이외에 추가로 변수 생성
mutate_all(faithful, funs(log=log(.))) %>% head(1)

eruptions,waiting,eruptions_log,waiting_log
3.6,79,1.280934,4.369448


In [79]:
mutate_all(faithful, funs(log(.), log2(.))) %>% head(1)

eruptions,waiting,eruptions_log,waiting_log,eruptions_log2,waiting_log2
3.6,79,1.280934,4.369448,1.847997,6.303781


In [69]:
mutate_all(faithful, funs(A=log(.), B=log2(.))) %>% head(1)

eruptions,waiting,eruptions_A,waiting_A,eruptions_B,waiting_B
3.6,79,1.280934,4.369448,1.847997,6.303781


In [70]:
mutate_if(faithful, is.numeric, log) %>% head(2)

eruptions,waiting
1.2809338,4.369448
0.5877867,3.988984


In [4]:
mutate_if(faithful, is.numeric, funs(log=log(.))) %>% head(2)

ERROR: Error in mutate_if(faithful, is.numeric, funs(log = log(.))) %>% head(2): 함수 "%>%"를 찾을 수 없습니다


In [71]:
mutate_if(faithful, is.numeric, funs(log(.))) %>% head(2)

eruptions,waiting
1.2809338,4.369448
0.5877867,3.988984


In [76]:
mutate_if(faithful, is.numeric, funs(log(.), log2(.) )) %>% head(2)

eruptions,waiting,eruptions_log,waiting_log,eruptions_log2,waiting_log2
3.6,79,1.2809338,4.369448,1.8479969,6.303781
1.8,54,0.5877867,3.988984,0.8479969,5.754888


In [77]:
mutate_if(faithful, is.numeric, funs(A=log(.), B=log2(.) )) %>% head(2)

eruptions,waiting,eruptions_A,waiting_A,eruptions_B,waiting_B
3.6,79,1.2809338,4.369448,1.8479969,6.303781
1.8,54,0.5877867,3.988984,0.8479969,5.754888


In [78]:
mutate_at(iris, vars(-Species), funs(log(.))) %>% head()

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1.629241,1.252763,0.3364722,-1.6094379,setosa
1.589235,1.098612,0.3364722,-1.6094379,setosa
1.547563,1.163151,0.2623643,-1.6094379,setosa
1.526056,1.131402,0.4054651,-1.6094379,setosa
1.609438,1.280934,0.3364722,-1.6094379,setosa
1.686399,1.360977,0.5306283,-0.9162907,setosa


In [85]:
mutate_at(iris, vars(-Species), funs(log=log(.))) %>% head()

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species,Sepal.Length_log,Sepal.Width_log,Petal.Length_log,Petal.Width_log
5.1,3.5,1.4,0.2,setosa,1.629241,1.252763,0.3364722,-1.6094379
4.9,3.0,1.4,0.2,setosa,1.589235,1.098612,0.3364722,-1.6094379
4.7,3.2,1.3,0.2,setosa,1.547563,1.163151,0.2623643,-1.6094379
4.6,3.1,1.5,0.2,setosa,1.526056,1.131402,0.4054651,-1.6094379
5.0,3.6,1.4,0.2,setosa,1.609438,1.280934,0.3364722,-1.6094379
5.4,3.9,1.7,0.4,setosa,1.686399,1.360977,0.5306283,-0.9162907


In [74]:
mutate_at(iris, vars(-Species), log) %>% head()

Sepal.Length,Sepal.Width,Petal.Length,Petal.Width,Species
1.629241,1.252763,0.3364722,-1.6094379,setosa
1.589235,1.098612,0.3364722,-1.6094379,setosa
1.547563,1.163151,0.2623643,-1.6094379,setosa
1.526056,1.131402,0.4054651,-1.6094379,setosa
1.609438,1.280934,0.3364722,-1.6094379,setosa
1.686399,1.360977,0.5306283,-0.9162907,setosa


### add_column
- mutate + 새로 생성한 변수 위치 지정 + 기존 변수명 사용하면 error
- mutate는 기존 변수명 사용하면 덮어쓰기

In [12]:
library(tibble)

ERROR: Error in library(tibble): there is no package called 'tibble'


In [112]:
mtcars %>% add_column(car_name=rownames(mtcars), .before="cyl") %>% head()

ERROR: Error in add_column(., car_name = rownames(mtcars), .before = "cyl"): 함수 "add_column"를 찾을 수 없습니다


### rename

In [14]:
library(dplyr)

"package 'dplyr' was built under R version 3.5.1"

ERROR: Error: package 'dplyr' was installed by an R version with different internals; it needs to be reinstalled for use with this R version


In [None]:
rename(iris, Length=Sepal.Length) %>% head()

In [13]:
a <- iris %>% rownames_to_column(var="rowname") %>% head()

ERROR: Error in iris %>% rownames_to_column(var = "rowname") %>% head(): 함수 "%>%"를 찾을 수 없습니다


In [None]:
has_rownames(a)

In [None]:
a<-remove_rownames(a)
column_to_rownames(a, var="rowname")

In [None]:
A<-select(iris, 1)
B<-select(iris, 2)

bind_cols(A,B) %>% head()

In [None]:
iris_rownames <- rownames_to_column(iris, var="rowname")
A<-slice(iris_rownames, 1:5)
B<-slice(iris_rownames, 11:15)

bind_rows(A,B) 