# OBJETOS EM R

# VECTORS
* Formato mais básico de objeto em R.
* São `logical`, `integer`, `double`, `complex`, `character` e `raw`.
* Índices variam de [1, n]. Índices negativos excluem os elementos do vector.
* Reciclagem de elementos: caso aplicado uma operação em vectors com tamanhos distintos, ocorre o seguinte:
```R
v1 <- c(3,8,4,5,0,11)
v2 <- c(4,11)
# V2 becomes c(4,11,4,11,4,11)
```

## Tipos de vectors

In [1]:
print("abc");
print(12.5)
print(63L) # double
print(TRUE)
print(2+3i)
print(charToRaw('hello'))

[1] "abc"
[1] 12.5
[1] 63
[1] TRUE
[1] 2+3i
[1] 68 65 6c 6c 6f


## Criando vectors

In [6]:
# Criando uma sequência
v <- 3.8:11.4 # 11,4 não pertence a sequência
print(v)

# Operador seq()
print(seq(1, 9, by=0.5))

# c(): se um dos valores é caracter, todos serão do tipo caracter
s <- c('apple', 'red', 5, TRUE)
print(s)

[1]  3.8  4.8  5.8  6.8  7.8  8.8  9.8 10.8
 [1] 1.0 1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0 5.5 6.0 6.5 7.0 7.5 8.0 8.5 9.0
[1] "apple" "red"   "5"     "TRUE" 


## Acessando vectors

In [10]:
# Acessando elementos
t <- c("seg", "ter", "qua", "qui", "sex", "sab", "dom")
u <- t[c(2, 3, 7)]
print(u)

# É possível utilizar um booleano para acessar as posições

# Retirando elementos
x <- t[c(-1,-2)]
print(x)

[1] "ter" "qua" "dom"
[1] "qua" "qui" "sex" "sab" "dom"


## Manipulando vectors

In [12]:
v1 <- c(3,8,4,5,0,11)
v2 <- c(4,11,0,8,1,2)

add.result <- v1+v2
print(add.result)

sub.result <- v1-v2
print(sub.result)

multi.result <- v1*v2
print(multi.result)

divi.result <- v1/v2
print(divi.result)

[1]  7 19  4 13  1 13
[1] -1 -3  4 -3 -1  9
[1] 12 88  0 40  0 22
[1] 0.7500000 0.7272727       Inf 0.6250000 0.0000000 5.5000000


## Reciclagem de elementos

In [13]:
v1 <- c(3,8,4,5,0,11)
v2 <- c(4,11)
# v2 se torna c(4,11,4,11,4,11)

add.result <- v1+v2
print(add.result)

sub.result <- v1-v2
print(sub.result)

[1]  7 19  8 16  4 22
[1] -1 -3  0 -6 -4  0


# Funções úteis

In [18]:
v <- c(1, 9, 2, 4, 16)
sortit <- sort(v)
print(sortit)

revsortit <- sort(v, decreasing = TRUE)
print(revsortit)

str1 <- c("Red", "Blue", "Orange")
ssort <- sort(str1)
print(ssort)

[1]  1  2  4  9 16
[1] 16  9  4  2  1
[1] "Blue"   "Orange" "Red"   


# LISTS
* Contém elementos de diferentes tipos.
    * Vetores, listas, strings e outras listas dentro.
* Criada com `list()`.
* É possível dá um nome aos elementos da lista.
* Para acessar os índices pode-se utilizar `[]` ou `[[]]`.
    * `[[]]`: retorna o tipo do elemento, não o tipo `list`.
* Se acessar um elemento indevido, retorna `null`.

## Criando e nomeando listas

In [27]:
list_data <- list("Red", 12, TRUE, 53.12)
print(list_data)

list_data <- list(c('Jan', 'Feb', 'Mar'), matrix(c(3, 9, 5, 1, -2, 8), nrow=2), list('green', 12.3))
names(list_data) <- c('Meses', 'Matrix', 'Cores')
print(list_data)


[[1]]
[1] "Red"

[[2]]
[1] 12

[[3]]
[1] TRUE

[[4]]
[1] 53.12

$Meses
[1] "Jan" "Feb" "Mar"

$Matrix
     [,1] [,2] [,3]
[1,]    3    5   -2
[2,]    9    1    8

$Cores
$Cores[[1]]
[1] "green"

$Cores[[2]]
[1] 12.3


     [,1] [,2] [,3]
[1,]    3    5   -2
[2,]    9    1    8


## Acessando elementos da lista

In [36]:
list_data <- list(c('Jan', 'Feb', 'Mar'), matrix(c(3, 9, 5, 1, -2, 8), nrow=2), list('green', 12.3))
names(list_data) <- c('Meses', 'Matrix', 'Cores')
print(list_data[2])
print(list_data$Matrix)

$Matrix
     [,1] [,2] [,3]
[1,]    3    5   -2
[2,]    9    1    8

     [,1] [,2] [,3]
[1,]    3    5   -2
[2,]    9    1    8


## Juntando listas

In [37]:
list1 <- c(1, 2, 3)
list2 <- c('Brasil', 'Vasco', 'Canada')

merge_list <- c(list1, list2)
print(merge_list)

[1] "1"      "2"      "3"      "Brasil" "Vasco"  "Canada"


## Convertendo list para vector

In [40]:
# Prezar pelas operações
list1 <- c(1, 2, 3)
vect1 <- unlist(list1)
print(vect1)

[1] 1 2 3


# MATRIX
* 2D.
* Contém elementos do mesmo tipo atômico.
* Assinatura: `matrix(data, nrow, ncol, byrow, dimnames)`.
    * byrow = TRUE, os elementos são dispostos pelas linhas. O default é pelas colunas.
    * dimnames: é possível dar nomes as linhas e colunas

## Definição de matrizes

In [41]:
M <- matrix(c(3:14), nrow = 4, byrow = TRUE)
print(M)

N <- matrix(c(3:14), nrow = 4, byrow = FALSE)
print(N)

rownames = c("row1", "row2", "row3", "row4")
colnames = c("col1", "col2", "col3")

P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames = list(rownames, colnames))
print(P)

     [,1] [,2] [,3]
[1,]    3    4    5
[2,]    6    7    8
[3,]    9   10   11
[4,]   12   13   14
     [,1] [,2] [,3]
[1,]    3    7   11
[2,]    4    8   12
[3,]    5    9   13
[4,]    6   10   14
     col1 col2 col3
row1    3    4    5
row2    6    7    8
row3    9   10   11
row4   12   13   14


## Acessando elementos da matriz

In [44]:
rownames = c("row1", "row2", "row3", "row4")
colnames = c("col1", "col2", "col3")

P <- matrix(c(3:14), nrow = 4, byrow = TRUE, dimnames = list(rownames, colnames))

print(P)
print(P[1,3])
print(P[2,])
print(P[,3])

     col1 col2 col3
row1    3    4    5
row2    6    7    8
row3    9   10   11
row4   12   13   14
[1] 5
col1 col2 col3 
   6    7    8 
row1 row2 row3 row4 
   5    8   11   14 


## Fazendo operações sobre a matriz

In [53]:
mat1 <- matrix(c(3, 9, -1, 4), nrow=2)
mat2 <- matrix(c(5, 2, 0, 9), nrow=2)

print(mat1)
print(mat2)

soma <- mat1+mat2
sub <- mat1-mat2
# Multiplicação termo a termo
mult <- mat1*mat2
# Divisão termo a termo
div <- mat1/mat2
# Multiplicação de matriz
multm <- mat1%*%mat2


print(soma)
print(sub)
print(mult)
print(div)
print(multm)

     [,1] [,2]
[1,]    3   -1
[2,]    9    4
     [,1] [,2]
[1,]    5    0
[2,]    2    9
     [,1] [,2]
[1,]    8   -1
[2,]   11   13
     [,1] [,2]
[1,]   -2   -1
[2,]    7   -5
     [,1] [,2]
[1,]   15    0
[2,]   18   36
     [,1]      [,2]
[1,]  0.6      -Inf
[2,]  4.5 0.4444444
     [,1] [,2]
[1,]   13   -9
[2,]   53   36


# Arrays
* Guarda dados em n dimensões.


## Definição de arrays

In [56]:
vector1 <- c(5,9,3)
vector2 <- c(10,11,12,13,14,15)
column.names <- c("COL1","COL2","COL3")
row.names <- c("ROW1","ROW2","ROW3")
matrix.names <- c("Matrix1","Matrix2")

result <- array(c(vector1,vector2),dim = c(3,3,2),dimnames = list(row.names,column.names,
   matrix.names))
print(result)

, , Matrix1

     COL1 COL2 COL3
ROW1    5   10   13
ROW2    9   11   14
ROW3    3   12   15

, , Matrix2

     COL1 COL2 COL3
ROW1    5   10   13
ROW2    9   11   14
ROW3    3   12   15



## Acessando os elementos

In [57]:
vector1 <- c(5,9,3)
vector2 <- c(10,11,12,13,14,15)
column.names <- c("COL1","COL2","COL3")
row.names <- c("ROW1","ROW2","ROW3")
matrix.names <- c("Matrix1","Matrix2")

result <- array(c(vector1,vector2),dim = c(3,3,2),dimnames = list(row.names,
   column.names, matrix.names))

print(result)
print(result[3,,2])

, , Matrix1

     COL1 COL2 COL3
ROW1    5   10   13
ROW2    9   11   14
ROW3    3   12   15

, , Matrix2

     COL1 COL2 COL3
ROW1    5   10   13
ROW2    9   11   14
ROW3    3   12   15

COL1 COL2 COL3 
   3   12   15 


## Funções úteis

In [59]:
# apply(x, margin, fun)
# x: array; margin: nome do conjunto de dados usado; fun: função a ser aplicada.

vector1 <- c(5,9,3)
vector2 <- c(10,11,12,13,14,15)

new.array <- array(c(vector1,vector2),dim = c(3,3,2))
print(new.array)

# Use apply to calculate the sum of the rows across all the matrices.
result <- apply(new.array, c(1), sum) # 1: rows, 2: columns; X: dimname
print(result)

, , 1

     [,1] [,2] [,3]
[1,]    5   10   13
[2,]    9   11   14
[3,]    3   12   15

, , 2

     [,1] [,2] [,3]
[1,]    5   10   13
[2,]    9   11   14
[3,]    3   12   15

[1] 56 68 60


# FACTORS
* Categoriza e armazena em níveis (levels).
* Útil para modelagem estatística.

## Definição de factors

In [60]:
data <- c("East","West","East","North","North","East","West","West","West","East","North")

print(data)
print(is.factor(data))

factor_data <- factor(data)

print(factor_data)
print(is.factor(factor_data))

 [1] "East"  "West"  "East"  "North" "North" "East"  "West"  "West"  "West" 
[10] "East"  "North"
[1] FALSE
 [1] East  West  East  North North East  West  West  West  East  North
Levels: East North West
[1] TRUE


## Factors e data frame

In [66]:
height <- c(132, 151, 162, 139, 166, 147, 122)
weight <- c(48, 49, 66, 53, 67, 52, 40)
gender <- c("male", "male", "female", "female", "male", "female", "male")

input_data <- data.frame(height, weight, gender)
print(input_data)

print(is.factor(input_data$gender))
print(input_data$gender)

  height weight gender
1    132     48   male
2    151     49   male
3    162     66 female
4    139     53 female
5    166     67   male
6    147     52 female
7    122     40   male
[1] TRUE
[1] male   male   female female male   female male  
Levels: female male


## Alterando a ordem dos levels dos factors

In [69]:
data <- c("East","West","East","North","North","East","West",
   "West","West","East","North")
factor_data <- factor(data)
print(factor_data)

# Escolha a apresentação dos levels
new_order_data <- factor(factor_data,levels = c("East","West","North"))
print(new_order_data)

 [1] East  West  East  North North East  West  West  West  East  North
Levels: East North West
 [1] East  West  East  North North East  West  West  West  East  North
Levels: East West North


# DATA FRAMES
* Uma coluna tem valores de uma variável.
* Nomes das colunas não devem ser vazios.
* Nomes nas linhas devem ser únicos.
* Armazena tipos `numeric`, `factor` ou `character`.
* Adiciona colunas: `cbind(df, list)`. Adicionar linhas: `rbind(df, list)`.

## Definição do data frame

In [83]:
emp.data <- data.frame(
    emp_id = c(1:5),
    emp_name = c("Rick", "Dan", "Michelle", "Ryan", "Gary"),
    salary = c(623.3, 515.2, 611.0, 729.0, 843.25),
    
    start_date = as.Date(c("2012-01-01", "2012-12-19", "2001-03-23", "1997-04-10", "2010-01-27")),
    stringsAsFactors = FALSE # sem os levels
)

rbind(emp.data,list(6,"Paul", 160.32, "2019-08-05"))

print(emp.data)

emp_id,emp_name,salary,start_date
<dbl>,<chr>,<dbl>,<date>
1,Rick,623.3,2012-01-01
2,Dan,515.2,2012-12-19
3,Michelle,611.0,2001-03-23
4,Ryan,729.0,1997-04-10
5,Gary,843.25,2010-01-27
6,Paul,160.32,2019-08-05


  emp_id emp_name salary start_date
1      1     Rick 623.30 2012-01-01
2      2      Dan 515.20 2012-12-19
3      3 Michelle 611.00 2001-03-23
4      4     Ryan 729.00 1997-04-10
5      5     Gary 843.25 2010-01-27


## Estrutura e Resumo do Data Frame

In [84]:
emp.data <- data.frame(
    emp_id = c(1:5),
    emp_name = c("Rick", "Dan", "Michelle", "Ryan", "Gary"),
    salary = c(623.3, 515.2, 611.0, 729.0, 843.25),
    
    start_date = as.Date(c("2012-01-01", "2012-12-19", "2001-03-23", "1997-04-10", "2010-01-27")),
    stringsAsFactors = FALSE # sem os levels
)

print(str(emp.data))
print(summary(emp.data))
print(emp.data$salary)

'data.frame':	5 obs. of  4 variables:
 $ emp_id    : int  1 2 3 4 5
 $ emp_name  : chr  "Rick" "Dan" "Michelle" "Ryan" ...
 $ salary    : num  623 515 611 729 843
 $ start_date: Date, format: "2012-01-01" "2012-12-19" ...
NULL
     emp_id    emp_name             salary        start_date        
 Min.   :1   Length:5           Min.   :515.2   Min.   :1997-04-10  
 1st Qu.:2   Class :character   1st Qu.:611.0   1st Qu.:2001-03-23  
 Median :3   Mode  :character   Median :623.3   Median :2010-01-27  
 Mean   :3                      Mean   :664.4   Mean   :2006-09-15  
 3rd Qu.:4                      3rd Qu.:729.0   3rd Qu.:2012-01-01  
 Max.   :5                      Max.   :843.2   Max.   :2012-12-19  
[1] 623.30 515.20 611.00 729.00 843.25


## Acessando elementos no Data Frame

In [86]:
emp.data <- data.frame(
   emp_id = c (1:5), 
   emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
   salary = c(623.3,515.2,611.0,729.0,843.25), 
   
    start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
      "2015-03-27")),
   stringsAsFactors = FALSE
)

print(emp.data)
# 3, 5 linhas; 5, 4 colunas
result <- emp.data[c(3,5),c(2,4)]
print(result)

  emp_id emp_name salary start_date
1      1     Rick 623.30 2012-01-01
2      2      Dan 515.20 2013-09-23
3      3 Michelle 611.00 2014-11-15
4      4     Ryan 729.00 2014-05-11
5      5     Gary 843.25 2015-03-27
  emp_name start_date
3 Michelle 2014-11-15
5     Gary 2015-03-27


## Expandindo e Juntando Data Frame

In [94]:
emp.data <- data.frame(
   emp_id = c (1:5), 
   emp_name = c("Rick","Dan","Michelle","Ryan","Gary"),
   salary = c(623.3,515.2,611.0,729.0,843.25), 
   
   start_date = as.Date(c("2012-01-01", "2013-09-23", "2014-11-15", "2014-05-11",
      "2015-03-27")),
   stringsAsFactors = FALSE
)
# Adiciona a coluna "dept" (departamento)
emp.data$dept <- c("IT","Operations","IT","HR","Finance")

emp.newdata <- data.frame(
   emp_id = c (7:9), 
   emp_name = c("Rasmi","Pranab","Tusar"),
   salary = c(578.0,722.5,632.8), 
   start_date = as.Date(c("2013-05-21","2013-07-30","2014-06-17")),
   dept = c("IT","Operations","Fianance"),
   stringsAsFactors = FALSE
)

final.emp <- rbind(emp.data, emp.newdata)
print(final.emp)

  emp_id emp_name salary start_date       dept
1      1     Rick 623.30 2012-01-01         IT
2      2      Dan 515.20 2013-09-23 Operations
3      3 Michelle 611.00 2014-11-15         IT
4      4     Ryan 729.00 2014-05-11         HR
5      5     Gary 843.25 2015-03-27    Finance
6      7    Rasmi 578.00 2013-05-21         IT
7      8   Pranab 722.50 2013-07-30 Operations
8      9    Tusar 632.80 2014-06-17   Fianance
