Skip to content
This repository has been archived by the owner on Oct 29, 2023. It is now read-only.

Commit

Permalink
Additional snippits
Browse files Browse the repository at this point in the history
  • Loading branch information
Chris R. Albon authored and Chris R. Albon committed Nov 2, 2013
1 parent 905eeaf commit 020e47b
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 0 deletions.
19 changes: 19 additions & 0 deletions cleaning-gender.r
@@ -0,0 +1,19 @@
# Cleaning Up Gender
# Original source: Learning R

# create some messy fake gender data
gender <- c("MALE", "Male", "male", "M", "FEMALE", "Female", "female", "f", NA)

# find strings that start with m and optionally ending in ale
clean_gender <- str_replace(
gender,
ignore.case("^m(ale)?$"),
"Male"
)

# find strings the start with f and optionally end in emale
clean_gender <- str_replace(
clean_gender,
ignore.case("^f(emale)?$"),
"Female"
)
31 changes: 31 additions & 0 deletions cleaning-strings.r
@@ -0,0 +1,31 @@
# Cleaning Strings
# Original source: Learning R

# R includes grep, grepl, regexpr, sub, and gsub to handle strings. However they can be clunky, so the stringr package provides a "UI" for these functions to making working with them easier.

# Load LearningR package
library(learningr)

# Load english_monarchs data from the LearningR package
data(english_monarchs, package = "learningr")

# Load Stringr Library
library(stringr)

# detect commas in the domain variable, meaning that during that time a monarch had multiple territories (domains)
multiple_kingdoms <- str_detect(english_monarchs$domain, fixed(",")); multiple_kingdoms

# index domains where multiple_kingdoms is true. Show name and domain columns for those rows where it is true.
english_monarchs[multiple_kingdoms, c("name", "domain")]

# detect either a comma or an "and" in the ruler variable, meaning that a domain had multiple rulers
multiple_rulers <- str_detect(english_monarchs$name, ",|and")

# index domains where multiple rulers was true and that data isn't missing
english_monarchs$name[multiple_rulers & !is.na(multiple_rulers)]

# since individual rulers are split up by a comma or an and, we can split them up. The output is a list.
individual_rulers <- str_split(english_monarchs$name, ", | and ")

# take a look at the data
head(individual_rulers[sapply(individual_rulers, length) > 1])
2 changes: 2 additions & 0 deletions managing-data-frames.r
@@ -0,0 +1,2 @@
# Managing Data Frames

26 changes: 26 additions & 0 deletions scraping-webpages.r
@@ -1,3 +1,29 @@
# Scraping Web Pages
# Original source: Learning R

# Load RCurl Library
library(RCurl)

# Create a string with the URL to the website
time_url <- "http://tycho.usno.navy.mil/cgi-bin/timer.pl"

# Download the HTML
time_page <- getURL(time_url)

# Use concatenate and view the html in a pretty way
cat(time_page)

# load XML library
library(XML)

# parse the HTML
time_doc <- htmlParse(time_page); time_doc

# extract everything within the "pre" tag. The // denotes that we are searching the entire document. The [[1]] refers to the fact we are not moving a list to pre but moving the contents of the list.
pre <- xpathSApply(time_doc, "//pre")[[1]]

# split along newline \n, divides up each time
values <- strsplit(xmlValue(pre), "\n")[[1]][-1]

# split along the tabs \t+ divides each time into time the time and timezone
times <- strsplit(values, "\t+")
15 changes: 15 additions & 0 deletions strings-to-logical.r
@@ -0,0 +1,15 @@
# Converting Strings To Logical

# Create a string with Y and N string elements
answers <- c("Y", "Y", "N", "Y", "N")

# write a function that converts "Y" to TRUE and "N" to FALSE
yn_to_logical <- function(x) {
y <- rep.int(NA, length(x))
y[x == "Y"] <- TRUE
y[x == "N"] <- FALSE
y
}

# run the function on the data
yn_to_logical(answers)

0 comments on commit 020e47b

Please sign in to comment.