# INTRODUCTION TO R
#### Material based on: 
* [Applied Statistics with R by David Dalpiaz](https://daviddalpiaz.github.io/appliedstats/)

## RStudio

[RStudio](https://www.rstudio.com/)  
[Rstudio Cheat Sheets](https://www.rstudio.com/resources/cheatsheets/)

## Data Types:
    * Numeric (1, 42.0, 55.3)
    * Integer (1L, 5L, 10L)
    * Complex (1+3i)
    * Logical (TRUE, FALSE)
    * Character ('a', "multiple letters")


## Vectors:

### Vector Creation:

In [3]:
c(1,3,5,7,9,11)

In [5]:
-10:10

In [6]:
seq(from = 1.5, to = 5.3, by = 0.3)

In [7]:
rep(1, times = 5)

In [8]:
rep(1:3, times = 2, each = 2)

In [11]:
rep(1:3, c(3,1,2))

### Vector Indexing:

In [12]:
a = -5:4
length(a)

In [13]:
a[3]

In [14]:
a[-5]

In [15]:
a[2:5]

In [16]:
a[c(1,7,8)]

In [17]:
ind = c(TRUE, TRUE, TRUE, FALSE)
a[ind]

### Vector Operations:

In [18]:
a + 1

In [19]:
a * 2

In [20]:
2^a

In [22]:
sqrt(a+5)

In [23]:
log(a)

“NaNs produced”

In [24]:
a+5 > 4

In [25]:
a+5 == 5

In [26]:
a+5 != 7

In [28]:
a+5 > 5 & a+5 < 7

In [29]:
a[a>3]

In [30]:
which(a+5>5)

In [32]:
a[which(a+5>5)]

In [34]:
which.max(a)
a[10]

In [35]:
y = c(1,2,3)

In [36]:
a + y

“longer object length is not a multiple of shorter object length”

In [38]:
all(a+5 + rep(y, 4, 10), a+5 + y)

“coercing argument of type 'double' to logical”

In [39]:
a %*% a

0
85


In [40]:
a %o% a

0,1,2,3,4,5,6,7,8,9
25,20,15,10,5,0,-5,-10,-15,-20
20,16,12,8,4,0,-4,-8,-12,-16
15,12,9,6,3,0,-3,-6,-9,-12
10,8,6,4,2,0,-2,-4,-6,-8
5,4,3,2,1,0,-1,-2,-3,-4
0,0,0,0,0,0,0,0,0,0
-5,-4,-3,-2,-1,0,1,2,3,4
-10,-8,-6,-4,-2,0,2,4,6,8
-15,-12,-9,-6,-3,0,3,6,9,12
-20,-16,-12,-8,-4,0,4,8,12,16


## Matrices
### Matrix Creation:

In [41]:
A = matrix(a, 2, 5)
A

0,1,2,3,4
-5,-3,-1,1,3
-4,-2,0,2,4


In [42]:
A = matrix(a, 2, 5, byrow = TRUE)
A

0,1,2,3,4
-5,-4,-3,-2,-1
0,1,2,3,4


In [43]:
B = matrix(1:9, 3, 3)
B
B[2,3]

0,1,2
1,4,7
2,5,8
3,6,9


In [44]:
B[1,]

In [45]:
B[,2]

In [46]:
B[2, c(1,3)]

In [47]:
B[1:2, ]

0,1,2
1,4,7
2,5,8


In [48]:
C = rbind(1:3, 3:1, 2)
C

0,1,2
1,2,3
3,2,1
2,2,2


In [49]:
(B + C) *  C / C - C

0,1,2
1,4,7
2,5,8
3,6,9


In [50]:
B %*% C

0,1,2
27,24,21
33,30,27
39,36,33


In [51]:
t(C)

0,1,2
1,3,2
2,2,2
3,1,2


In [52]:
Z = cbind(3:5, c(15.3, 18.2, 7), 77:79)
solve(Z)

0,1,2
-0.85470577,0.64184397,0.199348284
-0.07092199,0.14184397,-0.070921986
0.06037953,-0.05319149,0.006325474


In [53]:
dim(A)

In [54]:
rowSums(A)

In [55]:
colSums(A)

In [56]:
rowMeans(A)

## Lists:

In [57]:
list1 = list(33.3, "abc", TRUE)
list1

In [58]:
list1[1]

In [59]:
list1[[1]]

In [60]:
list2 = list(a = c(1,3,7) , b = TRUE, c = diag(5))
list2

0,1,2,3,4
1,0,0,0,0
0,1,0,0,0
0,0,1,0,0
0,0,0,1,0
0,0,0,0,1


In [61]:
list2$c

0,1,2,3,4
1,0,0,0,0
0,1,0,0,0
0,0,1,0,0
0,0,0,1,0
0,0,0,0,1


## Data Frames:

In [62]:
ExampleDF = data.frame(ints = seq(2, 20, 2), Chars = LETTERS[1:10], Logical = rep(c(TRUE, FALSE, FALSE, TRUE), 3, 10))

In [63]:
ExampleDF

ints,Chars,Logical
2,A,True
4,B,False
6,C,False
8,D,True
10,E,True
12,F,False
14,G,False
16,H,True
18,I,True
20,J,False


In [64]:
ExampleDF$ints

In [65]:
ExampleDF[1]

ints
2
4
6
8
10
12
14
16
18
20


In [66]:
ExampleDF[[1]]

In [67]:
ExampleDF[7,]

Unnamed: 0,ints,Chars,Logical
7,14,G,False


In [68]:
ExampleDF[,3]

In [69]:
ExampleDF[8,2]

In [70]:
dim(ExampleDF)

## Programming/Control Flow

In [71]:
if (3 > 5){
    print(3)
} else {
    print(5)
}

[1] 5


In [72]:
x = 1:10
ifelse(x > 3, "YES", "NO")

In [74]:
for (i in 20:25){
    print(i)
}
print(20:25)

[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 20 21 22 23 24 25


In [75]:
temp = 3
while (temp < 14){
    temp = temp + 2
    print(temp)
}

[1] 5
[1] 7
[1] 9
[1] 11
[1] 13
[1] 15


In [76]:
set.seed(10)
func = function(arg1 = 33, arg2 = 55){
    a = 1:arg1
    b = sample(1:arg2, arg1)
    return(a %*% b)
}

In [77]:
func()

0
14964


In [78]:
func(21)

0
6271


In [79]:
func(3, 5)

0
13


### Apply and Aggregate:

In [80]:
exampledf2 = data.frame(state.x77)
exampledf2$chr = rep(as.factor(LETTERS[1:5]), 10)
exampledf2

Unnamed: 0,Population,Income,Illiteracy,Life.Exp,Murder,HS.Grad,Frost,Area,chr
Alabama,3615,3624,2.1,69.05,15.1,41.3,20,50708,A
Alaska,365,6315,1.5,69.31,11.3,66.7,152,566432,B
Arizona,2212,4530,1.8,70.55,7.8,58.1,15,113417,C
Arkansas,2110,3378,1.9,70.66,10.1,39.9,65,51945,D
California,21198,5114,1.1,71.71,10.3,62.6,20,156361,E
Colorado,2541,4884,0.7,72.06,6.8,63.9,166,103766,A
Connecticut,3100,5348,1.1,72.48,3.1,56.0,139,4862,B
Delaware,579,4809,0.9,70.06,6.2,54.6,103,1982,C
Florida,8277,4815,1.3,70.66,10.7,52.6,11,54090,D
Georgia,4931,4091,2.0,68.54,13.9,40.6,60,58073,E


In [81]:
lapply(exampledf2$Population, sqrt)

In [82]:
aggregate(exampledf2$Population ~ exampledf2$chr, FUN = mean)


exampledf2$chr,exampledf2$Population
A,2538.5
B,4641.2
C,5364.2
D,2727.1
E,5961.1
