# Data reshaping (redimensionamento de dados)
* Pode-se utilizar `cbind()` para juntar diversos `vectors` e formar um data frame.
* É possível juntar dois data frames com `rbind()`.
    * Deve-se ter os mesmos nomes de coluna.

## cbind() e rbind()

In [3]:
city <- c("Tampa","Seattle","Hartford","Denver")
state <- c("FL","WA","CT","CO")
zipcode <- c(33602,98104,06161,80294)

# CRIA UM DATA FRAME COM cbind
addresses <- cbind(city,state,zipcode)

cat("# # # # The First data frame\n") 
print(addresses)

new.address <- data.frame(
   city = c("Lowry","Charlotte"),
   state = c("CO","FL"),
   zipcode = c("80230","33949"),
   stringsAsFactors = FALSE
)

cat("# # # The Second data frame\n") 
print(new.address)

# JUNTA AMBOS OS DATA FRAMES
all.addresses <- rbind(addresses,new.address)

cat("# # # The combined data frame\n") 
print(all.addresses)

# # # # The First data frame
     city       state zipcode
[1,] "Tampa"    "FL"  "33602"
[2,] "Seattle"  "WA"  "98104"
[3,] "Hartford" "CT"  "6161" 
[4,] "Denver"   "CO"  "80294"
# # # The Second data frame
       city state zipcode
1     Lowry    CO   80230
2 Charlotte    FL   33949
# # # The combined data frame
       city state zipcode
1     Tampa    FL   33602
2   Seattle    WA   98104
3  Hartford    CT    6161
4    Denver    CO   80294
5     Lowry    CO   80230
6 Charlotte    FL   33949


## Juntando data frames

In [4]:
# Dados sobre diabetes em Pina, disponível na biblioteca MASS
library(MASS)

# Se houver match em ambas as colunas dos data sets, merge data frames
# Pima.te e Pima.tr são dados de MASS
merged.Pima <- merge(x = Pima.te, y = Pima.tr,
   by.x = c("bp", "bmi"),
   by.y = c("bp", "bmi")
)
print(merged.Pima)
nrow(merged.Pima)

   bp  bmi npreg.x glu.x skin.x ped.x age.x type.x npreg.y glu.y skin.y ped.y
1  60 33.8       1   117     23 0.466    27     No       2   125     20 0.088
2  64 29.7       2    75     24 0.370    33     No       2   100     23 0.368
3  64 31.2       5   189     33 0.583    29    Yes       3   158     13 0.295
4  64 33.2       4   117     27 0.230    24     No       1    96     27 0.289
5  66 38.1       3   115     39 0.150    28     No       1   114     36 0.289
6  68 38.5       2   100     25 0.324    26     No       7   129     49 0.439
7  70 27.4       1   116     28 0.204    21     No       0   124     20 0.254
8  70 33.1       4    91     32 0.446    22     No       9   123     44 0.374
9  70 35.4       9   124     33 0.282    34     No       6   134     23 0.542
10 72 25.6       1   157     21 0.123    24     No       4    99     17 0.294
11 72 37.7       5    95     33 0.370    27     No       6   103     32 0.324
12 74 25.9       9   134     33 0.460    81     No       8   126

## melt() e cast()
* melt(): transforma as colunas. Mantém unicidade.
* cast(): ...

In [24]:
mydata <- data.frame(
    id = c(1, 1, 2, 2),
    time = c(1, 2, 1, 2),
    x1 = c(5, 3, 6, 2),
    x2 = c(6, 5, 1, 4)
)
print(mydata)

library(reshape)
mdata <- melt(mydata, id=c("id", "time"))
print(mdata)

  id time x1 x2
1  1    1  5  6
2  1    2  3  5
3  2    1  6  1
4  2    2  2  4
  id time variable value
1  1    1       x1     5
2  1    2       x1     3
3  2    1       x1     6
4  2    2       x1     2
5  1    1       x2     6
6  1    2       x2     5
7  2    1       x2     1
8  2    2       x2     4


In [28]:
library(MASS)
print(ships)

molten.ships <- melt(ships, id = c("type","year"))
# type+year (columns) ~ variable (rows), sum = function
recasted.ship <- cast(molten.ships, type+year~variable, sum)
print(recasted.ship)

   type year period service incidents
1     A   60     60     127         0
2     A   60     75      63         0
3     A   65     60    1095         3
4     A   65     75    1095         4
5     A   70     60    1512         6
6     A   70     75    3353        18
7     A   75     60       0         0
8     A   75     75    2244        11
9     B   60     60   44882        39
10    B   60     75   17176        29
11    B   65     60   28609        58
12    B   65     75   20370        53
13    B   70     60    7064        12
14    B   70     75   13099        44
15    B   75     60       0         0
16    B   75     75    7117        18
17    C   60     60    1179         1
18    C   60     75     552         1
19    C   65     60     781         0
20    C   65     75     676         1
21    C   70     60     783         6
22    C   70     75    1948         2
23    C   75     60       0         0
24    C   75     75     274         1
25    D   60     60     251         0
26    D   60