# Introduction

This is a syntax and usage summary for R. The content is mainly from https://www.tutorialspoint.com/r/index.htm

## Hello World

In [1]:
my_str <- "Hello World!"
print(my_str)

[1] "Hello World!"


In [2]:
cat(my_str, 123)

Hello World! 123

# Data Types, Variables and Objects

## Basic Data Types

In [3]:
v <- TRUE 
print(class(v))

[1] "logical"


In [4]:
v <- 23.5
print(class(v))

[1] "numeric"


In [5]:
v <- 2L
print(class(v))

[1] "integer"


In [6]:
v <- 2+5i
print(class(v))

[1] "complex"


In [7]:
v <- "TRUE"
print(class(v))

[1] "character"


In [8]:
v <- charToRaw("Hello")
print(class(v))

[1] "raw"


##  Objects 

### Vector

* An ordered collection of variables of **same** types.

#### Create a vector

In [9]:
# Create a vector.
v <- c(0,1,2,3)
print(v)

# Get the class of the vector.
print(class(v))

[1] 0 1 2 3
[1] "numeric"


#### Getting the Length of a Vector

In [10]:
# Print the length of a vector
print(length(v))

[1] 4


#### Create a sequence

In [11]:
# Create a vector of a sequence
v <- 1:10
print(v)

v <- 1.2:10.1
print(v)

 [1]  1  2  3  4  5  6  7  8  9 10
[1] 1.2 2.2 3.2 4.2 5.2 6.2 7.2 8.2 9.2


#### Accessing vector elements 
* One should note that **indices in R starting from 1**.

In [12]:
# Accessing vector elements using position.
t <- c("Sun","Mon","Tue","Wed","Thurs","Fri","Sat")
u <- t[c(2,3,6)]
print(u)

# Accessing vector elements using negative indexing.
# Negative index means droping the index, not the same as that in Python!
x <- t[c(-2,-5)]
print(x)

# Accessing vector element by an index
print(t[2])

[1] "Mon" "Tue" "Fri"
[1] "Sun" "Tue" "Wed" "Fri" "Sat"
[1] "Mon"


#### Vector Manipulation

In [13]:
# Vector arithmetic
u <- 1:6
v <- 3:8

print(u+v)

[1]  4  6  8 10 12 14


In [14]:
# Vector Element Recycling

v1 <- c(3,8,4,5,0,11)
v2 <- c(4,11)
# V2 becomes c(4,11,4,11,4,11)

add.result <- v1+v2
print(add.result)

sub.result <- v1-v2
print(sub.result)

[1]  7 19  8 16  4 22
[1] -1 -3  0 -6 -4  0


#### Sorting a Vector

In [15]:
# Create a size-10 vector by sampling from 1:100 with replacement
v <- sample(1:100, 10, replace=TRUE)
print(v)

v_inc <- sort(v, decreasing = FALSE)
print(v_inc)

v_dec <- sort(v, decreasing = TRUE)
print(v_dec)

 [1] 97 35 98 86 61 86 25  8  7 85
 [1]  7  8 25 35 61 85 86 86 97 98
 [1] 98 97 86 86 85 61 35 25  8  7


### List
* An ordered collection of variables of **different** types.

#### Create a list

In [16]:
# Create a list.
list1 <- list(c(1,2,3),18L,"ABC")

# Print the list.
print(list1)

# Print the length of a list
print(length(list1))

[[1]]
[1] 1 2 3

[[2]]
[1] 18

[[3]]
[1] "ABC"

[1] 3


#### Giving Names to Elements

In [17]:
# Providing names to elements
names(list1) <- c("A vector", "The integer", "A_string")

print(list1)

$`A vector`
[1] 1 2 3

$`The integer`
[1] 18

$A_string
[1] "ABC"



#### Accessing Elements

In [18]:
# By indices
print(list1[2])

# By names
print(list1$`A vector`)

$`The integer`
[1] 18

[1] 1 2 3


#### Adding and Removing Elements

In [19]:
# Add an element
list1[length(list1)+1] <- "HI."

print(list1)

$`A vector`
[1] 1 2 3

$`The integer`
[1] 18

$A_string
[1] "ABC"

[[4]]
[1] "HI."



In [20]:
# Remove an element
list1[3] <- NULL

print(list1)

$`A vector`
[1] 1 2 3

$`The integer`
[1] 18

[[3]]
[1] "HI."



#### Combine Two Lists
* Function `c` here is **combine**. Check `?c` for more detail.

In [21]:
# Define a new list
list2 <- list("Sun", "Moon", 3.14, 20L)

# Combine two lists using function c
combined_list <- c(list1, list2)

print(combined_list)

$`A vector`
[1] 1 2 3

$`The integer`
[1] 18

[[3]]
[1] "HI."

[[4]]
[1] "Sun"

[[5]]
[1] "Moon"

[[6]]
[1] 3.14

[[7]]
[1] 20



#### Convert a List to a Vector

In [22]:
# Define a List
list3 <- list(1,2,3,4,5)

v <- unlist(list3)

print(v)

[1] 1 2 3 4 5


### Matrix

#### Create a Matrix

In [23]:
# Create a matrix.
M1 = matrix( c(1,2,3,4,5,6), nrow = 2, ncol = 3, byrow = TRUE)
print(M1)

     [,1] [,2] [,3]
[1,]    1    2    3
[2,]    4    5    6


In [24]:
# Create a matrix with dimnames
dimlabel1 = list("A", "B")
dimlabel2 = list("C", "D", "E")
M2 = matrix( c(1,2,3,4,5,6), nrow = 2, ncol = 3, byrow = TRUE, dimnames = list(dimlabel1, dimlabel2))
print(M2)

  C D E
A 1 2 3
B 4 5 6


#### Accessing Elements

In [25]:
# Getting particular element
print(M1[2,3])

[1] 6


In [26]:
# Getting particular column
print(M1[,3])

[1] 3 6


In [27]:
# Getting particular row
print(M1[1,])

[1] 1 2 3


#### Matrix Addition & Subtraction
* The rule for element-wise multiplication `*` and division `/` is the same.

In [28]:
# Re-define the M1 and M2 matrices
M1 = matrix(1:6, nrow = 2, ncol = 3, byrow = TRUE)
M2 = matrix(1:6, nrow = 2, ncol = 3, byrow = TRUE)

M2 = M2 + 1
print(M2)

     [,1] [,2] [,3]
[1,]    2    3    4
[2,]    5    6    7


In [29]:
M3 = M2 - M1
print(M3)

     [,1] [,2] [,3]
[1,]    1    1    1
[2,]    1    1    1


In [30]:
M4 = M2 - 4*M1
print(M4)

     [,1] [,2] [,3]
[1,]   -2   -5   -8
[2,]  -11  -14  -17


### Array

In [31]:
# Create an array.
a <- array(c(1,2,3,4),dim = c(3,3,2))
print(a)

, , 1

     [,1] [,2] [,3]
[1,]    1    4    3
[2,]    2    1    4
[3,]    3    2    1

, , 2

     [,1] [,2] [,3]
[1,]    2    1    4
[2,]    3    2    1
[3,]    4    3    2



In [32]:
# Create an array from two vectors.
a <- array(c(c(1,2,3,4),c(5,6)),dim = c(3,3,2))
print(a)

, , 1

     [,1] [,2] [,3]
[1,]    1    4    1
[2,]    2    5    2
[3,]    3    6    3

, , 2

     [,1] [,2] [,3]
[1,]    4    1    4
[2,]    5    2    5
[3,]    6    3    6



#### Name Dimensions

In [33]:
# Define dimnames
dimname1 <- c("A", "B", "C")
dimname2 <- c("D", "E", "F")
dimname3 <- c("G", "H")

# Create an array from two vectors with dimnames.
a <- array(c(c(1,2,3,4),c(5,6)),dim = c(3,3,2), dimnames = list(dimname1, dimname2, dimname3))
print(a)

, , G

  D E F
A 1 4 1
B 2 5 2
C 3 6 3

, , H

  D E F
A 4 1 4
B 5 2 5
C 6 3 6



#### Accessing Elements

In [34]:
# Getting a particular element
print(a[1,2,1])

[1] 4


In [35]:
# Getting a particular column
print(a[,2,1])

A B C 
4 5 6 


In [36]:
# Getting a particular matrix
print(a[,,2])

  D E F
A 4 1 4
B 5 2 5
C 6 3 6


#### Array Addition
* Same as other element-wise opeartions, e.g. `-`, `*`, `/`

In [37]:
# Define two arrays
a1 <- array(1:3, dim = c(2,2,2))
a2 <- array(4:6, dim = c(2,2,2))

cat("Array a1:\n")
print(a1)

cat("Array a2:\n")
print(a2)

# Calculate the element-wise sums
a_result <- a1 + a2

cat("Array a_result:\n")
print(a_result)


Array a1:
, , 1

     [,1] [,2]
[1,]    1    3
[2,]    2    1

, , 2

     [,1] [,2]
[1,]    2    1
[2,]    3    2

Array a2:
, , 1

     [,1] [,2]
[1,]    4    6
[2,]    5    4

, , 2

     [,1] [,2]
[1,]    5    4
[2,]    6    5

Array a_result:
, , 1

     [,1] [,2]
[1,]    5    9
[2,]    7    5

, , 2

     [,1] [,2]
[1,]    7    5
[2,]    9    7



#### Calculations Across Array Elements

* We use function `apply()` to calculate across aray elements

In [38]:
# Define an array 
a1 <- array(1:3, dim = c(2,2,2))

cat("array a1:\n")
print(a1)

# Calcuate means of elements along 1st axis
result1 <- apply(a1, MARGIN = c(1), mean)
cat("\narray result1:\n")
print(result1)

# Calcuate sums of elements along 1st and 2nd axes
result2 <- apply(a1, MARGIN = c(1,2), sum)
cat("\narray result2:\n")
print(result2)



array a1:
, , 1

     [,1] [,2]
[1,]    1    3
[2,]    2    1

, , 2

     [,1] [,2]
[1,]    2    1
[2,]    3    2


array result1:
[1] 1.75 2.00

array result2:
     [,1] [,2]
[1,]    3    4
[2,]    5    3


### Factor 

#### Create a Vector

In [39]:
# Create a vector.
staff_genders <- array(c("M","M","F","M","F","F","M"))

# Create a factor.
staff_gender_factor <- factor(staff_genders)

# Print the factor
print(staff_gender_factor)
print(nlevels(staff_gender_factor))

[1] M M F M F F M
Levels: F M
[1] 2


#### Generating Labels
* Function `gl` is used to generate labels
    - Syntax: `gl(n, k, labels)`
    - `n` : number of levels.
    - `k` : the number of replications.
    - `labels` : a vector of labels.

In [40]:
# Create labels
v <- gl(3, 4, labels = c("Apple", "Orange", "Banana"))
print(v)

 [1] Apple  Apple  Apple  Apple  Orange Orange Orange Orange Banana Banana
[11] Banana Banana
Levels: Apple Orange Banana


### Data Frames

#### Create a Data Frame

In [41]:
# Create a data frame.
staff_table <- data.frame(
    ID=c(1L,2L,3L),
    Name=c("Tom", "Ann", "Peter"),
    Gender=c("M","F","M"),
    Age=c(32L,36L,29L)
)

# Print the data frame.
print(staff_table)

  ID  Name Gender Age
1  1   Tom      M  32
2  2   Ann      F  36
3  3 Peter      M  29


#### Structure of the Dataframe

In [42]:
# Get the structure of the data frame.
str(staff_table)

'data.frame':	3 obs. of  4 variables:
 $ ID    : int  1 2 3
 $ Name  : Factor w/ 3 levels "Ann","Peter",..: 3 1 2
 $ Gender: Factor w/ 2 levels "F","M": 2 1 2
 $ Age   : int  32 36 29


#### Summer of the Dataframe

In [43]:
# Get the summer of the data frame
print(summary(staff_table))

       ID         Name   Gender      Age       
 Min.   :1.0   Ann  :1   F:1    Min.   :29.00  
 1st Qu.:1.5   Peter:1   M:2    1st Qu.:30.50  
 Median :2.0   Tom  :1          Median :32.00  
 Mean   :2.0                    Mean   :32.33  
 3rd Qu.:2.5                    3rd Qu.:34.00  
 Max.   :3.0                    Max.   :36.00  


#### Making a Sub-dataframe

In [44]:
# Making a sub-dataframe
staff_table_gender = data.frame(staff_table$Name, staff_table$Gender)

print(staff_table_gender)

  staff_table.Name staff_table.Gender
1              Tom                  M
2              Ann                  F
3            Peter                  M


In [45]:
# Getting first two rows
staff_table_12 <- staff_table[1:2,]

print(staff_table_12)

  ID Name Gender Age
1  1  Tom      M  32
2  2  Ann      F  36


In [46]:
# Getting particular cols and rows
staff_table_parti <- staff_table[c(1,3),c(2,4)]

print(staff_table_parti)

   Name Age
1   Tom  32
3 Peter  29


#### Update the Dataframe

In [47]:
# Add a new column
staff_table$Salay = c(30000,32000,29000)

print(staff_table)

  ID  Name Gender Age Salay
1  1   Tom      M  32 30000
2  2   Ann      F  36 32000
3  3 Peter      M  29 29000


In [48]:
# Add a new row
new_staff = data.frame(
    ID = 4L,
    Name = "Ken",
    Gender = "M",
    Age = 30L,
    Salay = 31000
)

staff_table <- rbind(staff_table, new_staff)

print(staff_table)

  ID  Name Gender Age Salay
1  1   Tom      M  32 30000
2  2   Ann      F  36 32000
3  3 Peter      M  29 29000
4  4   Ken      M  30 31000


### String

#### Define a String

In [49]:
# Assign a string
str1 <- "I got Tom's lunchbox."
print(str1)

[1] "I got Tom's lunchbox."


#### Combine Strings

In [50]:
str1 <- "I "
str2 <- "Like"
str3 <- "Hamburger."

# Combine strings with default settings
result <- paste(str1, str2, str3)
print(result)

[1] "I  Like Hamburger."


In [51]:
# Combine strings and connect them using "#"
result <- paste(str1, str2, str3, sep="#")
print(result)

[1] "I #Like#Hamburger."


In [52]:
# Cross combining strings using "#" and "%%"
result <- paste(c(str1, str2, str3), c("A", "B"), sep="#", collapse = "%%")
print(result)

[1] "I #A%%Like#B%%Hamburger.#A"


#### Format String 

The `format` function has a general syntax:

`format(x, digits, nsmall, scientific, width, justify = c("left", "right", "centre", "none")) `



In [53]:
str1 <- format(3.14159265358, digits = 4) # Keeps at most 4 digits
print(str1)

[1] "3.142"


In [54]:
str2 <- format(3.1, nsmall = 2) # Make decimal places after the floating point to be 2
print(str2)

[1] "3.10"


In [55]:
str3 <- format(3.1, scientific = TRUE)
print(str3)

[1] "3.1e+00"


In [56]:
str4 <- format("Hello", width = 10)
print(str4)

[1] "Hello     "


In [57]:
str5 <- format("Hello", width = 10, justify = "right")
print(str5)

[1] "     Hello"


#### Number of Chars

In [58]:
# Define the string
str1 <- "Hello"

print(nchar(str1))

[1] 5


#### Change the Case

In [59]:
# Define the string
str1 <- "Hello"

print(toupper(str1))

[1] "HELLO"


In [60]:
print(tolower(str1))

[1] "hello"


#### Extracting Parts of a String - `substr`

In [61]:
# Define a string
str1 <- "The air quality is ideal for most individuals."

# Get a substring
str2 <- substr(str1, start = 5, stop = 15)
print(str2)

[1] "air quality"


#### Split a String

In [62]:
# Define a string
str1 <- "The air quality is ideal for most individuals."

words <- strsplit(str1, split = " ")
print(words)

[[1]]
[1] "The"          "air"          "quality"      "is"           "ideal"       
[6] "for"          "most"         "individuals."



# Operations

## Arithmetic Operators

Arithmetic operators include `+`, `-`, `*`, `/`, `%%` (remainder), `%/%` (quotient) and `^` (exponent).

In [63]:
# Define vectors
v1 <- c(2,7,9)
v2 <- c(1,2,3)

print(v1+v2)
print(v1-v2)
print(v1*v2)
print(v1/v2)
print(v1%%v2)
print(v1%/%v2)
print(v1 ^ v2)

[1]  3  9 12
[1] 1 5 6
[1]  2 14 27
[1] 2.0 3.5 3.0
[1] 0 1 0
[1] 2 3 3
[1]   2  49 729


## Relational Operators
Relational Operators includes `<`, `>`, `==`, `<=`, `>=` and `!=`.

In [64]:
# Define vectors
v1 <- c(2,3,9)
v2 <- c(1,3,12)

print(v1<v2)
print(v1>v2)
print(v1==v2)
print(v1<=v2)
print(v1>=v2)
print(v1!=v2)

[1] FALSE FALSE  TRUE
[1]  TRUE FALSE FALSE
[1] FALSE  TRUE FALSE
[1] FALSE  TRUE  TRUE
[1]  TRUE  TRUE FALSE
[1]  TRUE FALSE  TRUE


## Logical Operators

In [65]:
print(TRUE || TRUE)
print(FALSE || TRUE)
print(TRUE || FALSE)
print(FALSE || FALSE)

[1] TRUE
[1] TRUE
[1] TRUE
[1] FALSE


In [66]:
print(TRUE && TRUE)
print(FALSE && TRUE)
print(TRUE && FALSE)
print(FALSE && FALSE)

[1] TRUE
[1] FALSE
[1] FALSE
[1] FALSE


In [67]:
v1 <- c(FALSE, FALSE, FALSE, TRUE)
v2 <- c(FALSE, TRUE, TRUE, TRUE)

print(v1&v2) 
print(v1|v2)
print(!v1)

[1] FALSE FALSE FALSE  TRUE
[1] FALSE  TRUE  TRUE  TRUE
[1]  TRUE  TRUE  TRUE FALSE


The logical operator `&&` and `||` considers only the first element of the vectors

In [68]:
v1 <- c(FALSE, FALSE, FALSE, TRUE)
v2 <- c(TRUE, TRUE, TRUE, FALSE)
print(v1&&v2)
print(v1||v2)

v3 <- c(TRUE, TRUE, TRUE, TRUE)
v4 <- c(FALSE, FALSE, FALSE, TRUE)
print(v2&&v3)
print(v1&&v4)

[1] FALSE
[1] TRUE
[1] TRUE
[1] FALSE


## Miscellaneous Operators

### `:` Create Vector Containing a Sequence of Numbers 

In [69]:
v1 <- 1:10
print(v1)

 [1]  1  2  3  4  5  6  7  8  9 10


### `%in%` Check if a Variable Belongs to a Vector

In [70]:
print(3 %in% v1)
print(11 %in% v1)

[1] TRUE
[1] FALSE


### `%*%` Matrix Multiplication

In [71]:
M1 <- matrix(1:6, nrow = 3, ncol = 2)
M2 <- matrix(3:8, nrow = 2, ncol = 3)

cat("\n")
print(M1)
cat("\n")
print(M2)

M_result <- M1 %*% M2

cat("\n")
print(M_result)


     [,1] [,2]
[1,]    1    4
[2,]    2    5
[3,]    3    6

     [,1] [,2] [,3]
[1,]    3    5    7
[2,]    4    6    8

     [,1] [,2] [,3]
[1,]   19   29   39
[2,]   26   40   54
[3,]   33   51   69


# Conditional Statements

## `if ... else` statement

In [72]:
x <- 2 # Assign 2 to x

# A if-only statement
if (x < 4){
    cat("x is less than 4.\n\n")
}

x <- 5 # Assign 5 to x

# A if-else statement
if (x < 4){
    cat("x is less than 4.\n\n")
}else{
    cat("x is not less than 4.\n")
}

x is less than 4.

x is not less than 4.


## `if ... else if ... else` statement

In [73]:
x <- 5 # Assign 5 to x

# A if-else statement
if (x < 4){
    cat("x is less than 4.\n\n")
}else if (x < 8){
    cat("x is not less than 4, but less than 8.\n")
}else{
    cat("x is not less 8.\n")
}

x is not less than 4, but less than 8.


In [74]:
x <- 10 # Assign 10 to x

# A if-else statement
if (x < 4){
    cat("x is less than 4.\n\n")
}else if (x < 8){
    cat("x is not less than 4, but less than 8.\n")
}else{
    cat("x is not less 8.\n")
}

x is not less 8.


## `switch` statement

In [75]:
# Running a switch statement
x1 <- switch(2, "One", "Two", "None Matched")
print(x1)

[1] "Two"


In [76]:
# Define a list of indices
idx <- list(1,2,3)
# Give names to list elements
names(idx) <- c("Apple", "Orange", "Banana")

# Determine the output by a switch
x2 <- switch(idx$Apple, "Apple", "Orange", "Banana")

cat("I got an", x2, ".\n")

I got an Apple .


# Loops

## `repeat` Loop

In [77]:
# Set the counter to be 0
count <- 0L

# Start the repeat loop
repeat{
    count <- count + 1L  # Increase the counter by 1
    cat("This is iteration #", count, ".\n", sep = "")
    if (count >= 5L)
        {
        break # Break the loop if counter >= 5
    }
}

This is iteration #1.
This is iteration #2.
This is iteration #3.
This is iteration #4.
This is iteration #5.


## `while` Loop

In [78]:
# Set the counter to be 0
count <- 0L

# Start the while loop
while (count < 5)
    {
    count <- count + 1L  # Increase the counter by 1
    cat("This is iteration #", count, ".\n", sep = "")
}

This is iteration #1.
This is iteration #2.
This is iteration #3.
This is iteration #4.
This is iteration #5.


## `for` Loop

In [79]:
# Generate integer sequence using seq.int
# seq.int(1,10,2) returns a vector starting from 1, stopping at/before 10, and increasing by 2
all_x <- seq.int(1,10,2)
print(all_x)

cat("\n")

# Start the for loop
for (x in all_x){
    print(x)
}

[1] 1 3 5 7 9

[1] 1
[1] 3
[1] 5
[1] 7
[1] 9


## `break` and `next`

In [80]:
# Start the for loop
for (x in all_x){
    if (x > median(all_x)){ # Break the loop if x is larger than the median
        break
    }
    print(x)
}

[1] 1
[1] 3
[1] 5


In [81]:
# Start the for loop
for (x in all_x){
    if (x == all_x[2]){ # Skip the iteration if x is equal to all_x[2]
        next
    }
    print(x)
}

[1] 1
[1] 5
[1] 7
[1] 9
