/
Day1_hacky_hour_1.R
57 lines (41 loc) · 1.55 KB
/
Day1_hacky_hour_1.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# Hacky Hour Afternoon Day 1
# Heili Lowman
# April 1, 2021
# This code will walk through the hacky hour on Day 1 of the R Workshop.
# Load packages.
library(tidyverse)
library(palmerpenguins)
# Load data.
penguin_data <- palmerpenguins::penguins
# Take a look at the data.
View(penguin_data)
# Find mean/average bill length (mm).
mean_bill <- mean(penguin_data$bill_length_mm, na.rm = TRUE)
# Look at new variable.
mean_bill
# Common functions: mean(), max(), min(), sd()
# We want to replace NAs with mean_bill (43.92193) in the bill length column only.
penguin_data <- replace_na(data = penguin_data,
list(bill_length_mm = mean_bill))
# Count NAs in bill_depth_mm column only.
sum(is.na(penguin_data$bill_depth_mm))
# is.na() - True or False statement
# sum() - adds everything together
# Find NAs in bill_depth_mm column only.
# When you change a dataset in some way, it's good practice to make a new dataset entirely.
penguin_na <- filter(penguin_data, is.na(bill_depth_mm) == TRUE)
# Look at new dataset.
View(penguin_na)
# Take out one full row of the dataset (all other columns too).
# "!" means "not"
# "!=" mean "does not equal"
# "&" means "and", "|" means "or"
penguin_rm <- filter(penguin_data, flipper_length_mm != 231 & body_mass_g != 6300)
# Look at new dataset.
View(penguin_rm)
# Sort the bill length column from smallest to largest (ascending).
penguin_order <- arrange(penguin_data, bill_length_mm)
# Sort by descending - arrange(penguin_data, desc(bill_length_mm))
# Look at new dataset.
View(penguin_order)
# End of script.