## Download R & Rstudio

* R https://cran.r-project.org/
* Rstudio https://www.rstudio.com/
* ipython notebook https://www.continuum.io/blog/developer/jupyter-and-conda-r

## Why R?

* http://www.kdnuggets.com/2015/05/r-vs-python-data-science.html
* http://www.kdnuggets.com/2015/05/poll-r-rapidminer-python-big-data-spark.html

## Application of R:

* statistic analysis
* linear regression
* facilal detection https://www.kaggle.com/c/facial-keypoints-detection/details/getting-started-with-r
* text mining http://rstudio-pubs-static.s3.amazonaws.com/12422_b2b48bb2da7942acaca5ace45bd8c60c.html

## Atomic class

* num: 1,1.2
* integer: 1L
* character: "1"
* logical: TRUE
* complex: 1+4i

## data structure

* vector
 * not allow different type of component: it'll transform automatically
* matrix
* factor
* data frame
* list

## Basic computing

In [None]:
# num: +-*/%%
1:9
sum()
max()
mean()
median()
# string:
paste("paste","me")
paste("paste","me",sep="")
paste("paste","me",sep=";")
paste0("paste","me")
paste( c("A","B"), c("C","D") ,sep="")
paste( c("A","B"), c("C","D") ,collapse="")
paste( c("A","B"), c("C","D") ,sep="",collapse="")
# lookup data type/structure:
typeof()
class()
str()

## Vector

In [None]:
x = vector()
# with a length and type
vector("character", length = 10)
character(5)  ## character vector of length 5
numeric(5)
logical(5)
x1 = c(1L, 2L, 3L)
x2 = c(0, 1L, "a", TRUE, 1 + (1+2i))

# vector expand
append(a,2)
c(a,2)  #use this

## Calculate BMI

In [None]:
# 曾經是笨小孩
h_brian = 180
h_tony = 169
h_sherry = 173
w_brian = 73
w_tony = 87
w_sherry = 43
b_brian = 73/(180/100)^2
b_tony = 

# for loop
weight = c(73,87,43)
height = c(180,169,173)
for(i in 1:3){
        print(weight[i]/((height[i])/100)^2)
}

# smart
weight/((height/100)^2)

## "=" vs "<-"

In [None]:
func_name = function(a){
        print(a)
}

func_name2 = function(a=1){
        print(a)
}

func_name3 = function(a<-1){
        print(a)
}

func_name4 = function(a=1){
        b<<-5
        print(a)
}

## Matrix

In [None]:
# syntax
matrix(1:9, byrow=TRUE, nrow=3)
matrix(1:9, nrow=3)
kevin = c(85,73)
marry = c(72,64)
jerry = c(59,66)
mat = matrix(c(kevin, marry, jerry), nrow=3, byrow= TRUE)
colnames(mat) = c('first', 'second')
rownames(mat) = c('kevin', 'marry', 'jerry')

# basic
dim(mat)
nrow(mat)
ncol(mat)
mat[1,]
mat[,1]
mat[1:2,]
rowSums(mat)
colSums(mat)

# insert new value
mat2 = rbind(mat, c(78,63))
rownames(mat2)[nrow(mat2)] = 'sam'
mat2

mat3 = cbind(mat2,c(82,77,70,64))
colnames(mat3)[ncol(mat3)] = 'third'
mat3

# arithmetic
m1 = matrix(1:4, byrow=TRUE, nrow=2)
m2 = matrix(5:8, byrow=TRUE, nrow=2)
m1 + m2
m1 - m2
m1 * m2
m1 / m2

# matrix multiplication - row number of m1 == col number of m2
nrow(m1) == ncol(m2)
m1 %*% m2

## Factor

In [None]:
# syntax
weather= c("sunny","rainy", "cloudy", "rainy", "cloudy")
weather_category = factor(weather)
weather_category

# order
temperature = c("Low", "High", "High", "Medium", "Low", "Medium")
temperature_category = factor(temperature, order = TRUE, levels = c("Low", "Medium", "High"))
temperature_category
temperature_category[3] > temperature_category[1]
temperature_category[4] > temperature_category[3]

# change levels name
weather= c("s","r", "c", "r", "c")
weather_factor = factor(weather)
levels(weather_factor) = c("cloudy","rainy","sunny")
weather_factor

## Data Frame

In [None]:
# syntax
df = data.frame(a=c(201,196,213,196,218),b=c(108.9,94.8,115.7,95.3,112.5))
colnames(df) = c("height","weight")
df_name = data.frame(name=c("Quincy","Jordan","Steven","Arron","Alexis"),
                    stringsAsFactors=FALSE)
df_with_name = cbind(df_name,df)

# merge
df_team = data.frame(name=c("Quincy","Jordan","Steven","Arron","Alexis"),
                     team=c("kings","Grizzlies","Thunder","knicks","Hornets"))
df_with_team = merge(df_with_name,df_team,by="name")
df_team2 = df_team
colnames(df_team2)[1]="nam"
merge(df_with_name,df_team2,by.x = "name",by.y="nam")

df_team3 = cbind(df_team,df_with_name[,2])
colnames(df_team3)[3]="height"
merge(df_with_name,df_team3,by="name")

# pick value
df_team2 = df_team
colnames(df_team2)[1]="nam"
merge(df_with_name,df_team2,by.x = "name",by.y="nam")

df_team3 = cbind(df_team,df_with_name[,2])
colnames(df_team3)[3]="height"
merge(df_with_name,df_team3,by="name")

bool = c(TRUE,TRUE,FALSE,FALSE,FALSE)
df_with_name[bool,]
which(df_with_name$name == "Jordan")
which(bool)

with(df_with_name, df_with_name[name=="Jordan",])
df_with_name[which(df_with_name$name == "Jordan"),]

head(df_with_name,1)
tail(df_with_name,1)
df_with_name[1]
df_with_name$name
df_with_name["name"]
df_with_name[1,]
df_with_name[,1]
df_with_name[[1]]
df_with_name[1:2]
df_with_name[,1:2]
df_with_name[1,"name"]
df_with_name[df_with_name["name"]=="Jordan",]
df_with_name[which(df_with_name$name == "Jordan"),]
with(df_with_name, df_with_name[name=="Jordan",])

# order
order(df_with_name$name)
df_with_name[order(df_with_name$name),]
df_with_name[1:3,]
df_with_name[c(1,3,5),]

# library vs require
library(dplyr)
if(!require(dplyr)){
        install.packages(dplyr)
}

# dplyr
df_with_name[c("name","height")]
select(df_with_name,name,height)
##################################
df_with_name[df_with_name["name"]=="Jordan",]
filter(df_with_name, name=="Jordan")
##################################
df_with_name[order(df_with_name["name"]),]
arrange(df_with_name, name)
##################################
mutate(df_with_name, BMI=weight/(height/100)^2)

## Flow Control

In [None]:
x=5;
if(x>3){
  print
  ("x > 3");
}else{
  print
  ("x <= 3");
}

x=5;
if(x>3){
  print ("x > 3");
} else if (x ==3){
  print ("x == 3");
}else{
  print
  ("x <= 3");
}

for(i in 1:10){
  print(i);
}

sum=0
for(i in 1:100){
  sum= sum+ i;
}
sum

sum(1:100)

ary = rep(NA, 100)
for(i in 1:100){
  ary[i]= i;
}
ary

ary2 =c()
for(i in 1:100){
  ary2 = c(ary2, i);
}
ary2

seq_len(5)

mat = matrix(1:9, byrow=TRUE, nrow=3)
for(i in seq_len(nrow(mat))) {
  for(j in seq_len(ncol(mat))) {
    cat(mat[i, j], " ")
  }
  cat('\n');
}

sum = 0;
cnt = 0;
while(cnt <= 100){
  sum = sum + cnt;
  cnt = cnt + 1;
}
sum

## 9*9

In [None]:
## 無知的小孩
m99 = matrix(NA,nrow=9,ncol=9)
for(i in 1:9){
        r99 = vector()
        for(j in 1:9){
                r99 = c(r99,(i*j))
        }
        m99[i,] = r99
}
colnames(m99) = m99[,1]
rownames(m99) = m99[1,]

## use %*%
x = 1:9
x = matrix(1:9,nrow=9,byrow=TRUE)
m99 = x %*% t(x)
rownames(m99) = x
colnames(m99) = x

## use %o%
x = 1:9
names(x) = x
x %o% x