In [79]:
# My first function
myfunction <- function() {
    x <- rnorm(100)
    mean(x)
}

myfunction()

In [80]:
# Second function
second <- function(x) {
    x + rnorm(length(x))
} 

second(6)

In [81]:
# A vector
second(2:5)

In [82]:
1  # Float
1L  # Int
1 / 0 # Infinity
0 / 0 # NaN

### Creating vectors. c stands for concatenate

In [83]:
x <- c(0.5, 0.6) # numeric
x <- c(TRUE, FALSE) # logical
x <- c(T, F) # logical
x <- c("a", "b", "c") # character
print(x)

[1] "a" "b" "c"


### Using the vector function (creates an empty vector)

In [84]:
x <- vector("numeric", length=10)
print(x)

# Insert value. Index starts at 1
x[1] = 1
print(x)

 [1] 0 0 0 0 0 0 0 0 0 0
 [1] 1 0 0 0 0 0 0 0 0 0


### Mixing objects. A coercion is done.

In [85]:
y <- c(1.7, "a") # Here the number can be a string, but the string a cannot be a number
print(y)
y <- c(FALSE, 2, TRUE)
print(y)
y <- c(TRUE, "a")
print(y)

[1] "1.7" "a"  
[1] 0 2 1
[1] "TRUE" "a"   


### Explicit coercion

In [86]:
x <- 0:6
class(x)
as.numeric(x)
as.logical(x)
as.character(x)

### Lists

Lists are a special type of vector that can contain elements of different classes. Lists are a very
important data type in R and you should get to know them well.

In [87]:
x <- list(1, "a", TRUE, 1+4i)
x

In [88]:
x[2]

### Matrices
Vectors with a dimension attribute (nrow, ncol).

In [89]:
m <- matrix(data = 0, nrow = 3, ncol = 5)
print(m)

     [,1] [,2] [,3] [,4] [,5]
[1,]    0    0    0    0    0
[2,]    0    0    0    0    0
[3,]    0    0    0    0    0


In [90]:
# Insert values into the matrix
m[2, 2] = 3.0
print(m)

     [,1] [,2] [,3] [,4] [,5]
[1,]    0    0    0    0    0
[2,]    0    3    0    0    0
[3,]    0    0    0    0    0


In [91]:
attributes(m)

### Creating matrices from 1D-vectors

In [92]:
m <- 1:10 # Create a vector
dim(m) <- c(2, 5) # Change dimension using another vector

print(m)

     [,1] [,2] [,3] [,4] [,5]
[1,]    1    3    5    7    9
[2,]    2    4    6    8   10


### Using column and row bind (cbind and rbind)

In [93]:
x <- 1:3
y <- 10:12

cbind(x, y)

x,y
1,10
2,11
3,12


In [94]:
rbind(x, y)

0,1,2,3
x,1,2,3
y,10,11,12


### Factors
Factors are used to represent categorical data. Factors can be unordered or ordered. One can think
of a factor as an integer vector where each integer has a label.

In [95]:
x <- factor(c("yes", "yes", "no", "no", "yes"))
print(x)
print(table(x))

[1] yes yes no  no  yes
Levels: no yes
x
 no yes 
  2   3 


In [96]:
unclass(x)
attr(x, "levels") # The numeric classing is by default done alphabetically

In [97]:
x <- factor(c("yes", "yes", "no", "no", "yes"),
           levels = c("yes", "no"))
unclass(x) # Now yes=1 and no=2

### Missing values

In [98]:
x <- c(1, 2, NA, 10, 3)
print(is.na(x))
print(is.nan(x)) # All are false

[1] FALSE FALSE  TRUE FALSE FALSE
[1] FALSE FALSE FALSE FALSE FALSE


### Data frames
Used to store tabular data

In [99]:
x <- data.frame(foo = 1:4, bar = c(T, T, F, F))
print(x)

  foo   bar
1   1  TRUE
2   2  TRUE
3   3 FALSE
4   4 FALSE


In [100]:
nrow(x)

In [101]:
ncol(x)

### Names

In [102]:
x <- c(1, 2, 3)
names(x) # No names here

names(x) <- c("Foo", "Bar", "Baz")

print(x)

NULL

Foo Bar Baz 
  1   2   3 


In [103]:
x <- list(a=1, b=2, c=3) # Same as Python dict
x

In [104]:
m <- matrix(1:4, nrow=2, ncol=2)
dimnames(m) <- list(c("a", "b"), c("c", "d"))
m

Unnamed: 0,c,d
a,1,3
b,2,4


### dput-ing R objects

In [105]:
y <- data.frame(a = 1, b ="a")
dput(y)

structure(list(a = 1, b = structure(1L, .Label = "a", class = "factor")), .Names = c("a", 
"b"), row.names = c(NA, -1L), class = "data.frame")


In [106]:
# Saving the y-variable to an .R file
dput(y, file = "y.R")

In [107]:
# Read the object into new variable
new.y = dget("y.R")
new.y

Unnamed: 0,a,b
1,1,a


### Dumping R objects
Here you can save multiple objects to a file

In [108]:
x <- "foo"
y <- data.frame(a=1, b="a")
dump(c("x", "y"), file = "data.R") # Save to file
rm(x, y) # Delete the objects

In [109]:
source("data.R") # Read the data into the name space
y
x

Unnamed: 0,a,b
1,1,a


### File connections

In [110]:
str(file)

function (description = "", open = "", blocking = TRUE, encoding = getOption("encoding"), 
    raw = FALSE, method = getOption("url.method", "default"))  


In [111]:
# Read a simple txt file
con <- file("read_files/test1.txt", "r")
data <- readLines(con, 2) # Read the first line
close(con)

print(data)
data[1]
print(data[[1]])

In file("read_files/test1.txt", "r"): cannot open file 'read_files/test1.txt': No such file or directory

ERROR: Error in file("read_files/test1.txt", "r"): cannot open the connection


### Subsetting

In [112]:
eval(x <- c("a", "b", "c", "d", "a"))
x[1]
x[1:3]

In [113]:
x[x > "a"] # a is the lowest vector

In [114]:
u <- x > "a"
u

### Subsetting lists

In [115]:
x <- list(foo = 1:4, bar = 0.6) # This is actually a python dict
x["foo"] # or x[1]
x[2]

In [116]:
x[[1]]

In [117]:
x$bar # Using dollar. Similar as x[["bar"]]
x["bar"]
x[["bar"]]

In [118]:
x$bar = 1.5
x

In [119]:
# Extract multiple elements from a list
x <- list(foo = 1:4, bar=0.6, baz="hello")
x[c(1, 3)] # Extracting element 1 and 3 from list

In [120]:
# Using strings to access variables in the list
name <- "foo"
x[[name]] # Works
x$name # Does not work. Searching after a variable called name in the lust
x$foo # Works

NULL

In [121]:
# Subsetting nested elements
x <- list(a = list(10, 12, 14), b = c(3.14, 2.81))

x[[c(1, 3)]]
x[[1]][[3]]
y <- x[1]
y[["a"]][[1]]

x[[c(2, 1)]]

### Subsetting matrices

In [122]:
# Can be accessed using typical (i, j) indices
x <- matrix(1:6, 2, 3)
x

0,1,2
1,3,5
2,4,6


In [123]:
x[1, 2]
x[2, 2]
x[1, ]
x[, 2]

By default single elements are returned as vector of len 1, rather than a 1x1 matrix. This
can be fixed by turning of the drop parameter.

In [124]:
x[1, 2]
x[1, 2, drop = F] # Result is a matrix now

0
3


### Partial matching

You can find elements using the first char. However, this does not work if there are several objects starting with the same char.

In [125]:
x <- list(aardvark = 1:6, bs = 1:3)
x$a # Finds the object which starts with "a"
x[["a"]] # Does not work. Trying to find exact.
x[["a", exact = F]] # This will work

NULL

In [126]:
x$b

### Removing NA values

In [127]:
x <- c(1, 2, NA, 4, NA, 56, NaN, 5)
bad <- is.na(x)
x[!bad]

In [128]:
y <- c("a", "b", NA, NA, "FIVE", "ch", "h", NA)

good <- complete.cases(x, y)
good

In [129]:
# Now we have only the elements which have non NA/NaN 
x[good]
y[good]

### Vectorized operations

In [130]:
x <- 1:4; y <- 6:9

In [131]:
x + y

In [132]:
x * y

In [133]:
y == 8

In [134]:
x / y

### Vectorized matrix operations

In [135]:
x <- matrix(1:4, 2, 2); y <- matrix(rep(10, 4), 2)

In [136]:
x * y # element-wise multiplication

0,1
10,30
20,40


In [137]:
x %*% y # True matrix multiplication

0,1
40,40
60,60


# Week 1 Quiz

In [138]:
x <- c(1, 3, 5); y <- c(3, 2, 10)
rbind(x, y)

0,1,2,3
x,1,3,5
y,3,2,10


In [139]:
x <- list(2, "a", "b", TRUE)

class(x[[1]])

In [140]:
x <- 1:4; y <- 2
x+y

In [141]:
x <- c(17, 14, 4, 5, 13, 12, 10)
x
x[x > 10] <- 4 # Change all elements greater than 10
x

In [142]:
df <- read.csv("hw1_data.csv", header=TRUE, sep=",")
head(df, n=10)

Unnamed: 0,Ozone,Solar.R,Wind,Temp,Month,Day
1,41.0,190.0,7.4,67.0,5.0,1.0
2,36.0,118.0,8.0,72.0,5.0,2.0
3,12.0,149.0,12.6,74.0,5.0,3.0
4,18.0,313.0,11.5,62.0,5.0,4.0
5,,,14.3,56.0,5.0,5.0
6,28.0,,14.9,66.0,5.0,6.0
7,23.0,299.0,8.6,65.0,5.0,7.0
8,19.0,99.0,13.8,59.0,5.0,8.0
9,8.0,19.0,20.1,61.0,5.0,9.0
10,,194.0,8.6,69.0,5.0,10.0


In [143]:
str(df)

'data.frame':	153 obs. of  6 variables:
 $ Ozone  : int  41 36 12 18 NA 28 23 19 8 NA ...
 $ Solar.R: int  190 118 149 313 NA NA 299 99 19 194 ...
 $ Wind   : num  7.4 8 12.6 11.5 14.3 14.9 8.6 13.8 20.1 8.6 ...
 $ Temp   : int  67 72 74 62 56 66 65 59 61 69 ...
 $ Month  : int  5 5 5 5 5 5 5 5 5 5 ...
 $ Day    : int  1 2 3 4 5 6 7 8 9 10 ...


In [144]:
names(df)
dim(df)

In [145]:
df[1:2, ]

Unnamed: 0,Ozone,Solar.R,Wind,Temp,Month,Day
1,41.0,190.0,7.4,67.0,5.0,1.0
2,36.0,118.0,8.0,72.0,5.0,2.0


In [146]:
tail(df, 2)

Unnamed: 0,Ozone,Solar.R,Wind,Temp,Month,Day
152,18.0,131.0,8.0,76.0,9.0,29.0
153,20.0,223.0,11.5,68.0,9.0,30.0


In [147]:
df$Ozone[[47]]

In [148]:
# Number of NA-values in the Ozone column
sum(is.na(df$Ozone))

In [149]:
mean(df$Ozone[!is.na(df$Ozone)]) # My first solution
mean(df$Ozone, na.rm = T) # Much better

In [150]:
summary(df$Ozone)

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
   1.00   18.00   31.50   42.13   63.25  168.00      37 

In [151]:
new_df <- df[(df$Ozone > 31 & df$Temp > 90), ]
test_df <- df[with(df, Ozone > 31 & Temp > 90), ] # May also be done using with
head(new_df)
mean(new_df$Solar.R, na.rm = T)
mean(test_df$Solar.R, na.rm = T)

Unnamed: 0,Ozone,Solar.R,Wind,Temp,Month,Day
,,,,,,
NA.1,,,,,,
69,97.0,267.0,6.3,92.0,7.0,8.0
70,97.0,272.0,5.7,92.0,7.0,9.0
NA.2,,,,,,
NA.3,,,,,,


In [152]:
df_msix <- df[df$Month == 6, ]
mean(df_msix$Temp, na.rm = T)

In [153]:
head(df)
max(df[(df$Month == 5), ]$Ozone, na.rm = T)


Unnamed: 0,Ozone,Solar.R,Wind,Temp,Month,Day
1,41.0,190.0,7.4,67.0,5.0,1.0
2,36.0,118.0,8.0,72.0,5.0,2.0
3,12.0,149.0,12.6,74.0,5.0,3.0
4,18.0,313.0,11.5,62.0,5.0,4.0
5,,,14.3,56.0,5.0,5.0
6,28.0,,14.9,66.0,5.0,6.0
