## Working with Dataframes

### Loading a CSV into a DataFrame

In [1]:
balloons = read.csv('http://archive.ics.uci.edu/ml/machine-learning-databases/balloons/adult-stretch.data', header = F)

### `head`

In [3]:
head(balloons, 4)

V1,V2,V3,V4,V5
YELLOW,SMALL,STRETCH,ADULT,True
YELLOW,SMALL,STRETCH,CHILD,True
YELLOW,SMALL,DIP,ADULT,True
YELLOW,SMALL,DIP,CHILD,False


### `colnames`

In [4]:
colnames(balloons) <- c('color', 'size', 'act', 'age', 'inflated')

In [5]:
colnames(balloons)

In [6]:
head(balloons)

color,size,act,age,inflated
YELLOW,SMALL,STRETCH,ADULT,True
YELLOW,SMALL,STRETCH,CHILD,True
YELLOW,SMALL,DIP,ADULT,True
YELLOW,SMALL,DIP,CHILD,False
YELLOW,SMALL,DIP,CHILD,False
YELLOW,LARGE,STRETCH,ADULT,True


### Data Dimensions

In [7]:
dim(balloons)

### `rownames`


In [9]:
n = dim(balloons)[1]
paste("balloon_", 1:n, sep="")

In [10]:
n = dim(balloons)[1]
rownames(balloons) <- paste("balloon_", 1:n, sep="")

In [11]:
head(balloons)

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True
balloon_2,YELLOW,SMALL,STRETCH,CHILD,True
balloon_3,YELLOW,SMALL,DIP,ADULT,True
balloon_4,YELLOW,SMALL,DIP,CHILD,False
balloon_5,YELLOW,SMALL,DIP,CHILD,False
balloon_6,YELLOW,LARGE,STRETCH,ADULT,True


### Columns are Vectors

In [12]:
balloons$color

In [23]:
balloons[c('balloon_7','balloon_12'),c(2,3)]

Unnamed: 0,size,act
balloon_7,LARGE,STRETCH
balloon_12,SMALL,STRETCH


#### Columns Accessed by name are filtered DataFrames

In [18]:
balloons[c('balloon_7','balloon_12'),'color']

In [16]:
balloons[c('size','act')]

Unnamed: 0,size,act
balloon_1,SMALL,STRETCH
balloon_2,SMALL,STRETCH
balloon_3,SMALL,DIP
balloon_4,SMALL,DIP
balloon_5,SMALL,DIP
balloon_6,LARGE,STRETCH
balloon_7,LARGE,STRETCH
balloon_8,LARGE,DIP
balloon_9,LARGE,DIP
balloon_10,LARGE,DIP


## Rows are filtered DataFrames

In [24]:
balloons['balloon_1',]

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True


In [25]:
balloons[paste("balloon_", 1:3, sep=""),]

Unnamed: 0,color,size,act,age,inflated
balloon_1,YELLOW,SMALL,STRETCH,ADULT,True
balloon_2,YELLOW,SMALL,STRETCH,CHILD,True
balloon_3,YELLOW,SMALL,DIP,ADULT,True


In [26]:
grepl('_1.', rownames(balloons))

In [37]:
balloons[grepl('_1\\d', rownames(balloons)),]

Unnamed: 0,color,size,act,age,inflated
balloon_10,YELLOW,LARGE,DIP,CHILD,False
balloon_11,PURPLE,SMALL,STRETCH,ADULT,True
balloon_12,PURPLE,SMALL,STRETCH,CHILD,True
balloon_13,PURPLE,SMALL,DIP,ADULT,True
balloon_14,PURPLE,SMALL,DIP,CHILD,False
balloon_15,PURPLE,SMALL,DIP,CHILD,False
balloon_16,PURPLE,LARGE,STRETCH,ADULT,True
balloon_17,PURPLE,LARGE,STRETCH,CHILD,True
balloon_18,PURPLE,LARGE,DIP,ADULT,True
balloon_19,PURPLE,LARGE,DIP,CHILD,False


In [33]:
balloons[balloons$color == 'PURPLE',]

Unnamed: 0,color,size,act,age,inflated
balloon_11,PURPLE,SMALL,STRETCH,ADULT,True
balloon_12,PURPLE,SMALL,STRETCH,CHILD,True
balloon_13,PURPLE,SMALL,DIP,ADULT,True
balloon_14,PURPLE,SMALL,DIP,CHILD,False
balloon_15,PURPLE,SMALL,DIP,CHILD,False
balloon_16,PURPLE,LARGE,STRETCH,ADULT,True
balloon_17,PURPLE,LARGE,STRETCH,CHILD,True
balloon_18,PURPLE,LARGE,DIP,ADULT,True
balloon_19,PURPLE,LARGE,DIP,CHILD,False
balloon_20,PURPLE,LARGE,DIP,CHILD,False


In [34]:
balloons[balloons$act == 'DIP',]

Unnamed: 0,color,size,act,age,inflated
balloon_3,YELLOW,SMALL,DIP,ADULT,True
balloon_4,YELLOW,SMALL,DIP,CHILD,False
balloon_5,YELLOW,SMALL,DIP,CHILD,False
balloon_8,YELLOW,LARGE,DIP,ADULT,True
balloon_9,YELLOW,LARGE,DIP,CHILD,False
balloon_10,YELLOW,LARGE,DIP,CHILD,False
balloon_13,PURPLE,SMALL,DIP,ADULT,True
balloon_14,PURPLE,SMALL,DIP,CHILD,False
balloon_15,PURPLE,SMALL,DIP,CHILD,False
balloon_18,PURPLE,LARGE,DIP,ADULT,True


In [35]:
summary(balloons)

    color       size         act        age      inflated      
 PURPLE:10   LARGE:10   DIP    :12   ADULT: 8   Mode :logical  
 YELLOW:10   SMALL:10   STRETCH: 8   CHILD:12   FALSE:8        
                                                TRUE :12       
                                                NA's :0        

## Practice

#### Display the yellow balloons

#### Display the inflated balloons

#### Display the stretching balloons

#### Display the large balloons

#### Display the yellow balloons

#### Display just the color and size columns

#### Display just the color and size columns for large, yellow balloons