# Data Frame Manipulation -- Base R data.frame
Basic data frame manipulation using base R data frames.

In [1]:
library(magrittr)

## Load csv

In [2]:
df <- read.csv(here::here(".data", "titanic.csv"))
head(df, 3)

Unnamed: 0_level_0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<dbl>,<int>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>
1,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2.0,,"St Louis, MO"
2,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"


## Summary

In [3]:
# Rows
nrow(df)

In [4]:
# Columns
length(df)

In [5]:
# Inspect types
str(df)

'data.frame':	1309 obs. of  14 variables:
 $ pclass   : int  1 1 1 1 1 1 1 1 1 1 ...
 $ survived : int  1 1 0 0 0 1 1 0 1 0 ...
 $ name     : chr  "Allen, Miss. Elisabeth Walton" "Allison, Master. Hudson Trevor" "Allison, Miss. Helen Loraine" "Allison, Mr. Hudson Joshua Creighton" ...
 $ sex      : chr  "female" "male" "female" "male" ...
 $ age      : num  29 0.917 2 30 25 ...
 $ sibsp    : int  0 1 1 1 1 0 1 0 2 0 ...
 $ parch    : int  0 2 2 2 2 0 0 0 0 0 ...
 $ ticket   : chr  "24160" "113781" "113781" "113781" ...
 $ fare     : num  211 152 152 152 152 ...
 $ cabin    : chr  "B5" "C22 C26" "C22 C26" "C22 C26" ...
 $ embarked : chr  "S" "S" "S" "S" ...
 $ boat     : chr  "2" "11" "" "" ...
 $ body     : int  NA NA NA 135 NA NA NA NA NA 22 ...
 $ home.dest: chr  "St Louis, MO" "Montreal, PQ / Chesterville, ON" "Montreal, PQ / Chesterville, ON" "Montreal, PQ / Chesterville, ON" ...


In [6]:
# Descriptive statistics
summary(df)

     pclass         survived         name               sex           
 Min.   :1.000   Min.   :0.000   Length:1309        Length:1309       
 1st Qu.:2.000   1st Qu.:0.000   Class :character   Class :character  
 Median :3.000   Median :0.000   Mode  :character   Mode  :character  
 Mean   :2.295   Mean   :0.382                                        
 3rd Qu.:3.000   3rd Qu.:1.000                                        
 Max.   :3.000   Max.   :1.000                                        
                                                                      
      age              sibsp            parch          ticket         
 Min.   : 0.1667   Min.   :0.0000   Min.   :0.000   Length:1309       
 1st Qu.:21.0000   1st Qu.:0.0000   1st Qu.:0.000   Class :character  
 Median :28.0000   Median :0.0000   Median :0.000   Mode  :character  
 Mean   :29.8811   Mean   :0.4989   Mean   :0.385                     
 3rd Qu.:39.0000   3rd Qu.:1.0000   3rd Qu.:0.000                     
 Max. 

## Slice data

In [7]:
# Select rows by number
df[2:4,]

Unnamed: 0_level_0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<dbl>,<int>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>
2,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON"
3,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON"
4,1,0,"Allison, Mr. Hudson Joshua Creighton",male,30.0,1,2,113781,151.55,C22 C26,S,,135.0,"Montreal, PQ / Chesterville, ON"


In [8]:
# Select rows by condition
df[df[, "survived"] == 1,] %>% head(3)

Unnamed: 0_level_0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<dbl>,<int>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>
1,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2,,"St Louis, MO"
2,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11,,"Montreal, PQ / Chesterville, ON"
6,1,1,"Anderson, Mr. Harry",male,48.0,0,0,19952,26.55,E12,S,3,,"New York, NY"


In [9]:
# Select rows by multiple conditions
df[df[, "survived"] == 1 & df[, "sex"] == "female",] %>% head(3)

Unnamed: 0_level_0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<dbl>,<int>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>
1,1,1,"Allen, Miss. Elisabeth Walton",female,29,0,0,24160,211.3375,B5,S,2,,"St Louis, MO"
7,1,1,"Andrews, Miss. Kornelia Theodosia",female,63,1,0,13502,77.9583,D7,S,10,,"Hudson, NY"
9,1,1,"Appleton, Mrs. Edward Dale (Charlotte Lamson)",female,53,2,0,11769,51.4792,C101,S,D,,"Bayside, Queens, NY"


In [10]:
# Select columns by name
df[, c("survived", "pclass")] %>% head(3)

Unnamed: 0_level_0,survived,pclass
Unnamed: 0_level_1,<int>,<int>
1,1,1
2,1,1
3,0,1


## Assign values

In [11]:
# Assign a (new) column
df$sparkles <- 8
head(df, 3)

Unnamed: 0_level_0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,sparkles
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<dbl>,<int>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>,<dbl>
1,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2.0,,"St Louis, MO",8
2,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON",8
3,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",8


In [12]:
# Assign value to some rows
df[df[, "survived"] == 1, "sparkles"] <- 3
head(df, 3)

Unnamed: 0_level_0,pclass,survived,name,sex,age,sibsp,parch,ticket,fare,cabin,embarked,boat,body,home.dest,sparkles
Unnamed: 0_level_1,<int>,<int>,<chr>,<chr>,<dbl>,<int>,<int>,<chr>,<dbl>,<chr>,<chr>,<chr>,<int>,<chr>,<dbl>
1,1,1,"Allen, Miss. Elisabeth Walton",female,29.0,0,0,24160,211.3375,B5,S,2.0,,"St Louis, MO",3
2,1,1,"Allison, Master. Hudson Trevor",male,0.9167,1,2,113781,151.55,C22 C26,S,11.0,,"Montreal, PQ / Chesterville, ON",3
3,1,0,"Allison, Miss. Helen Loraine",female,2.0,1,2,113781,151.55,C22 C26,S,,,"Montreal, PQ / Chesterville, ON",8


## Group by

## Reshape

In [13]:
# Gather / melt

In [14]:
# Spread / cast / pivot

## Join

## Creation

In [15]:
df2 <- data.frame(
    x = c(1,2,3)
    , y = c('a', 'b', 'c')
    , z = c(TRUE, FALSE, TRUE)
)
df2

x,y,z
<dbl>,<chr>,<lgl>
1,a,True
2,b,False
3,c,True
