Adding and updating columns by reference

In [None]:
#Latihan 1
# Add a new column, duration_hour
batrips[, duration_hour := duration / 3600]

In [None]:
#Latihan 2
# Print untidy
head(untidy, 2)

In [None]:
# Fix spelling in the second row of start_station using the LHS := RHS form
untidy[2, start_station := "San Francisco City Hall"]

In [None]:
#Latihan 3
# Replace negative duration values with NA
untidy[duration < 0, duration := NA]

Grouped aggregations

In [None]:
#Latihan 1
# Add a new column equal to total trips for every start station
batrips[, trips_N := .N, by = start_station]

In [None]:
# Add new column for every start_station and end_station
batrips[, duration_mean := mean(duration), by = .(start_station, end_station)]

In [None]:
#Latihan 2
# Calculate the mean duration for each month
batrips_new[, mean_dur := mean(duration, na.rm = TRUE), 
            by = month(start_date)]

In [None]:
# Replace NA values in duration with the mean value of duration for that month
batrips_new[, mean_dur := mean(duration, na.rm = TRUE), 
            by = month(start_date)][is.na(duration), 
                                    duration := mean_dur]

In [None]:
# Delete the mean_dur column by reference
batrips_new[, mean_dur := mean(duration, na.rm = TRUE), 
            by = month(start_date)][is.na(duration), 
                                    duration := mean_dur][, mean_dur := NULL]

Advanced aggregations

In [None]:
#Latihan 1
# Add columns using the LHS := RHS form
batrips[, c("mean_duration", 
            "median_duration") := .(mean(duration), median(duration)), 
        by = start_station]

In [None]:
#Latihan 2
# Add columns using the functional form
batrips[, `:=`(mean_duration = mean(duration), 
               median_duration = median(duration)), 
        by = start_station]

In [None]:
#Latihan 3
# Add the mean_duration column
batrips[duration > 600, mean_duration := mean(duration), 
        by = .(start_station, end_station)]

Fast data reading with fread()

In [None]:
#Latihan 1
# Use read.csv() to import batrips
system.time(read.csv("batrips.csv"))

In [None]:
# Use fread() to import batrips
system.time(fread("batrips.csv"))

In [None]:
#Latihan 2
# Import using read.csv()
csv_file <- read.csv("sample.csv", fill = NA, quote = "", 
                     stringsAsFactors = FALSE, strip.white = TRUE, 
                     header = TRUE)
csv_file

In [None]:
# Import using fread()
csv_file <- fread("sample.csv")
csv_file

In [None]:
#Latihan 3
# Select "id" and "val" columns
select_columns <- fread("sample.csv", select = c("id", "val"))
select_columns

In [None]:
# Drop the "val" column
drop_column <- fread(url, drop = "val")
drop_column

In [None]:
#Latihan 4
# Import the file
entire_file <- fread("sample.csv")
entire_file

In [None]:
# Import the file while avoiding the warning
only_data <- fread("sample.csv", nrows = 3)
only_data

In [None]:
# Import only the metadata
only_metadata <- fread("sample.csv", skip = 7)
only_metadata

Advanced file reading

In [None]:
#Latihan 1
# Import the file using fread 
fread_import <- fread("sample.csv")

# Import the file using read.csv 
base_import <- read.csv("sample.csv")

# Check the class of id column
class(fread_import$id)
class(base_import$id)

In [None]:
#Latihan 2
# Import using read.csv with defaults
base_r_defaults <- read.csv("sample.csv")
str(base_r_defaults)

In [None]:
# Import using read.csv
base_r <- read.csv("sample.csv", 
                   colClasses = c(rep("factor", 4), 
                                  "character", "integer", 
                                  rep("numeric", 4)))
str(base_r)

In [None]:
# Import using fread
import_fread <- fread("sample.csv", 
                      colClasses = list(factor = 1:4, numeric = 7:10))
str(import_fread)

In [None]:
#Latihan 3
# Import the file and note the warning message
incorrect <- fread("sample.csv")
incorrect

In [None]:
# Import the file correctly
correct <- fread("sample.csv", fill = TRUE)
correct

In [None]:
#Latihan 4
# Import the file using na.strings
missing_values <- fread("sample.csv", na.strings = "##")
missing_values

Fast data writing with fwrite()

In [None]:
#Latihan 1
# Write dt to fwrite.txt
fwrite(dt, "fwrite.txt")

# Import the file using readLines()
readLines("fwrite.txt")

# Import the file using fread()
fread("fwrite.txt")

In [None]:
#Latihan 2
# Write batrips_dates to file using "ISO" format
fwrite(batrips_dates, "iso.txt", dateTimeAs = "ISO")

# Import the file back
iso <- fread("iso.txt")
iso

In [None]:
# Write batrips_dates to file using "squash" format
fwrite(batrips_dates, "squash.txt", dateTimeAs = "squash")

# Import the file back
squash <- fread("squash.txt")
squash

In [None]:
# Write batrips_dates to file using "epoch" format
fwrite(batrips_dates, "epoch.txt", dateTimeAs = "epoch")

# Import the file back
epoch <- fread("epoch.txt")
epoch

In [None]:
#Latihan 3
# Use write.table() to write batrips
system.time(write.table(batrips, "base-r.txt"))

In [None]:
# Use fwrite() to write batrips
system.time(fwrite(batrips, "data-table.txt"))