In [1]:
install.packages("dplyr")
library(dplyr)

if(!dir.exists("./data")){ 
    dir.create("./data")
}

dataset_archive <- "./data/UCI_HAR_Dataset.zip"
if(!file.exists(dataset_archive)){
    download.file(url="https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip", destfile = dataset_archive)
    unzip(zipfile = dataset_archive, exdir = "./data/")    
}

Installing package into ‘/usr/local/lib/R/3.3/site-library’
(as ‘lib’ is unspecified)
: S3 methods ‘[.fun_list’, ‘[.grouped_df’, ‘all.equal.tbl_df’, ‘anti_join.data.frame’, ‘anti_join.tbl_df’, ‘anti_join.tbl_lazy’, ‘arrange_.data.frame’, ‘arrange_.tbl_df’, ‘arrange_.tbl_lazy’, ‘as.data.frame.grouped_df’, ‘as.data.frame.rowwise_df’, ‘as.data.frame.tbl_cube’, ‘as.data.frame.tbl_df’, ‘as.data.frame.tbl_sql’, ‘as.fun_list.function’, ‘as.fun_list.character’, ‘as.fun_list.fun_list’, ‘as.table.tbl_cube’, ‘as.tbl.data.frame’, ‘as.tbl.tbl’, ‘as.tbl_cube.array’, ‘as.tbl_cube.data.frame’, ‘as.tbl_cube.matrix’, ‘as.tbl_cube.table’, ‘as_data_frame.grouped_df’, ‘as_data_frame.tbl_cube’, ‘auto_copy.tbl_cube’, ‘auto_copy.tbl_df’, ‘auto_copy.tbl_sql’, ‘c.sql’, ‘cbind.grouped_df’, ‘collapse.data.frame’, ‘collapse.tbl_sql’, ‘collect.data.frame’, ‘collect.tbl_sql’, ‘compute.data.frame’, ‘compute.tbl_sql’, ‘copy_to.src_local’, ‘copy_to.src_sql’, ‘db_analyze.DBIConnection’, ‘db_analyze.MySQLConnection’, ‘db

In [49]:
# Read test data set
test_ds_X <- read.table(file = "./data/UCI HAR Dataset/test/X_test.txt")
test_ds_y <- read.table(file = "./data/UCI HAR Dataset/test/y_test.txt")
test_ds_subject <- read.table(file = "./data/UCI HAR Dataset/test/subject_test.txt")
test_ds <- cbind(test_ds_X, test_ds_y, test_ds_subject)

In [50]:
# Read train data set
train_ds_X <- read.table(file = "./data/UCI HAR Dataset/train/X_train.txt")
train_ds_y <- read.table(file = "./data/UCI HAR Dataset/train/y_train.txt")
train_ds_subject <- read.table(file = "./data/UCI HAR Dataset/train/subject_train.txt")
train_ds <- cbind(train_ds_X, train_ds_y, train_ds_subject)

In [51]:
# Read features' names and activity labels
features <- read.table(file="./data/UCI HAR Dataset/features.txt", sep=" ", stringsAsFactors = F)
labels <- c(as.vector(features$V2), "Activity", "Subject")
activities <- read.table(file="./data//UCI HAR Dataset/activity_labels.txt", sep=" ", stringsAsFactors = F)

In [52]:
# Step 1: Merges the training and the test sets to create one data set
full_ds <- rbind(train_ds, test_ds)
names(full_ds) <- labels

In [53]:
# Step 2: Extracts only the measurements on the mean and standard deviation for each measurement.
labels <- labels[grep("(mean\\(|std\\()|Activity|Subject", labels)]
full_ds <- full_ds[labels]

In [54]:
# Step 3,4: Uses descriptive activity names to name the activities in the data set
full_ds <- merge(full_ds, activities, by.x="Activity", by.y ="V1")
full_ds$Activity <- full_ds$V2
full_ds <- subset(full_ds, select = -c(V2))
write.table(full_ds, file = "step4_data.table")

In [56]:
# Step 5: Creates independent tidy data set with the average of each variable for each activity and each subject
grouped_means <- full_ds %>% group_by(Activity, Subject) %>% summarise_each(funs(mean))
write.table(grouped_means, file = "step5_data.table", row.names = F)