# Removing and Listing Variables

In [1]:
# You can remove variables in R using the remove() or the rm() function.
variable1 <- 1
variable1
rm(variable1)
variable1

# Variable1 returned 1 the first time we called it,
# but returned an error the second time, as the variable has been removed.

ERROR: Error in eval(expr, envir, enclos): object 'variable1' not found


In [2]:
# You can delete multiple variables at once by including each one within the rm() function.
# The following will delete variable1 and variable2, but not variable3.
variable1 <- 1
variable2 <- c(1,2)
variable3 <- c(1,2,3)
rm(variable1, variable2)
variable3
variable2
variable1
# Similar to before, our the script returned an error when we tried calling a variable that no longer exists.
# This time though, we didn't even get to line where we call the variable variable1.

ERROR: Error in eval(expr, envir, enclos): object 'variable2' not found


In [3]:
# Lastly, you can also delete all variables at once with the ccommand: rm(list = ls())
variable1 <- 1
variable2 <- c(1,2)
variable3 <- c(1,2,3)
rm(list = ls() )
variable1
variable2

# WARNING: YOU SHOULD ONLY RUN THIS COMMAND IN THE CONSOLE.  IF THIS IS INCLUDED IN SCRIPTS OR MARKDOWN FILES,
# IT CAN POTENTIALLY CAUSE UNINTENDED EFFECTS.

ERROR: Error in eval(expr, envir, enclos): object 'variable1' not found


In [4]:
# The ls() function can be used to list all variables currently available.
a  <- 1
b <- 2
c <- 3
d <- 4
e <- 5

ls()

rm(d)

ls()

# The class() function, and the is.____() functions

In [5]:
# You can check the class of a variable with the class() function.

# By default, R does not differentiate between integers and decinals; to R they are both just numeric.
integer <- 4
class(integer)

decimal <- 4.5
class(decimal)

# By typing "L" after an integer, you can tell R to store the variable as specifically an integer.
numeric_by_default <- 10
specifically_an_integer <- 10L
class(numeric_by_default)
class(specifically_an_integer)

In [6]:
# R recognizes a vector of a certain class as that class itself.
# Instead of a vector of numerics, R just sees numeric.
vector_of_numbers <- c(1, 2, 3)
class(vector_of_numbers)
vector_of_characters <- c("1", "2", "3")
class(vector_of_characters)

In [7]:
# R only sees a list as a disctint class itself, not as a list of a certain class.
list_of_numbers <- list(1, 2, 3, 4, 5)
class(list_of_numbers)
list_of_characters <- list("1", "2", "3", "4", "5")
class(list_of_characters)
list_of_mixed_classes <- list(1, c("1", "2", "3"), "4", FALSE)
class(list_of_mixed_classes)

In [8]:
# Instead of checking what class a varible is, you may wish to test whether it is a specified class.
# You can use the is.___() functions to do so.
# To check whether the variable x is a number, use the function is.numeric(x).
# Likewise, to check whether the variable x is a dataframe, use the function is.dataframe(x)

In [9]:
x <- 5
is.numeric(x)
is.integer(x)

y <- 5L
is.numeric(y)
is.numeric(y)

z = data.frame(1:3, 4:6, 7:9)
is.numeric(z)
is.data.frame(z)

# Vectors

In [10]:
# When using vectors, you can "name" each element using the names() function after defining the vector.
profits <- c(98, 94, 96,
             97, 100, 103,
             102, 102, 104,
             105, 100, 103)
names(profits) <- c("January", "February", "March",
                   "April", "May", "June",
                   "July", "August", "September",
                   "October", "November", "December")
# You can then reference an element by both index and name.
profits[7]
profits["July"]

In [11]:
# Alternatively, you can name each element as you're defining the vector.
rm(profits)
profits <- c("January" = 98, "February" = 94, "March" = 96,
             "April" = 97, "May" = 100, "June" = 103,
             "July" = 102, "August" = 102, "September" = 104,
             "October" = 105, "November" = 100, "December" = 103)
profits[7]
profits["July"]

In [12]:
# Consider your daily revenue in a given week, and your daily costs in the same week
daily_revenue <- c(98, 95, 92, 94, 93)
daily_costs <- c(24, 20, 22, 22, 23)
# Now define a vector of day names.
days_vector <- c("Monday", "Tuesday", "Wednesday", "Thursday", "Friday")

# Then you do something completely unnecesary-but nonetheless awesome, like this.
names(daily_revenue) <- days_vector -> names(daily_costs)
daily_revenue["Tuesday"] - daily_costs["Tuesday"]

In [13]:
# Logical operations are performed element-wise with vectors, returning a list of logical variables.
daily_costs > 22

# It therefore follows that a logical operation can be used to select which elements we want.
# Under the hood, you are forming a vector of logical variables,
# then passing that vector into your vector of interest.
daily_costs[c(TRUE, FALSE, FALSE, FALSE, TRUE)]
daily_costs[daily_costs > 22]

In [14]:
# We can also define multiple conditions.
daily_costs[daily_costs > 22 & daily_costs < 24]
c(daily_revenue - daily_costs)[c(daily_revenue - daily_costs) > 70]

# Factors

In [15]:
# Factors are a data type used to store repetitive categorical data.
# You can create a factor by passing a vector into the function factor().
speeds <- c("S", "A", "A", "F", "A", "S", "F", "A", "F")
speeds_factor <- factor(speeds)
class(speeds_factor)

# You can then use the summary() function for a summary of the levels that make up the factor
summary(speeds_factor)

In [16]:
# You can use the levels function to define each distinct category, or "level," within a factor.
# WARNING: THE ORDER WITH WHICH YOU ASSIGN LEVELS IS IMPORTANT.

# R automatically puts categories in alphabetical order when defining a factor.
# but R assigns each category a level in the order you define each level.

# In our example, we have "S" (Slow), "A" (Average), and "F" (Fast).
# R automatically orders the categories: A, F, S.
# Therefore, we must define the levels with the same order: "Average," then "Fast," then "Slow."
# Compare the following assignments:
levels(speeds_factor) <- c("Slow", "Average", "Fast")
summary(speeds_factor)

levels(speeds_factor) <- c("Average", "Fast", "Slow")
summary(speeds_factor)

In [17]:
# In some cases, certain categories are "better" than others, as in this example.
# The "ordered" parameter can be set to TRUE to specify this.
# Note that this overrides R's default behavior of putting levels in alphabetical order.
grades <- c("Good", "Needs Improvement", "Needs Improvement", "Bad", "Needs Improvement", "Bad", "Good", "Needs Improvement", "Good")
grades_factor <- factor(grades)
summary(grades_factor)

grades_factor <- factor(grades, ordered=TRUE, levels=c("Bad", "Needs Improvement", "Good"))
summary(grades_factor)

grades_factor
levels(grades_factor)
# Note that the first "Good" appears in the factor before the first "Bad,"
# calling the levels() function returns the level in the specified order.

In [18]:
# If you give the categories in a factor a defined order, you can perform logical operations.
# The third element of grades_factor is "Needs Improvement," and the seventh element is "Good."
# Because we gave each level an order, "Needs Improvement" is seen as less than "Good."
grades_factor[3] > grades_factor[7]
# Likewise, the third element "Needs Improvement" is seen as more than the sixth element "Bad."
grades_factor[3] > grades[6]

# Dataframes

In [19]:
# Define vectors of different classes
days <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
revenues <- c(0, 98, 0, 92, 94, 93, 0)
costs <- c(0, 24, 0, 22, 22, 23, 0)
type_of_day <- c("Weekend", "Workday", "Holiday", "Workday", "Workday", "Workday", "Weekend")

# Unlike matrices, the columns in dataframs can contain different datatypes
company <- data.frame(days, revenues, costs, type_of_day)
class(company)
company

days,revenues,costs,type_of_day
Sunday,0,0,Weekend
Monday,98,24,Workday
Tuesday,0,0,Holiday
Wednesday,92,22,Workday
Thursday,94,22,Workday
Friday,93,23,Workday
Saturday,0,0,Weekend


In [20]:
# You can use existing columns to creaate new columns
company$profits <- company$revenues - company$costs
company$workday <- with(company, ifelse(type_of_day == "Workday", TRUE, FALSE))
company

days,revenues,costs,type_of_day,profits,workday
Sunday,0,0,Weekend,0,False
Monday,98,24,Workday,74,True
Tuesday,0,0,Holiday,0,False
Wednesday,92,22,Workday,70,True
Thursday,94,22,Workday,72,True
Friday,93,23,Workday,70,True
Saturday,0,0,Weekend,0,False


In [21]:
# The defaul R function to get a summary of a dataframe is str()
str(company)
# Within the dplyr library is an alternative function - glimpse
library("dplyr")
cat("\n")

glimpse(company)

'data.frame':	7 obs. of  6 variables:
 $ days       : Factor w/ 7 levels "Friday","Monday",..: 4 2 6 7 5 1 3
 $ revenues   : num  0 98 0 92 94 93 0
 $ costs      : num  0 24 0 22 22 23 0
 $ type_of_day: Factor w/ 3 levels "Holiday","Weekend",..: 2 3 1 3 3 3 2
 $ profits    : num  0 74 0 70 72 70 0
 $ workday    : logi  FALSE TRUE FALSE TRUE TRUE TRUE ...


"package 'dplyr' was built under R version 3.6.3"
Attaching package: 'dplyr'

The following objects are masked from 'package:stats':

    filter, lag

The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union




Rows: 7
Columns: 6
$ days        <fct> Sunday, Monday, Tuesday, Wednesday, Thursday, Friday, S...
$ revenues    <dbl> 0, 98, 0, 92, 94, 93, 0
$ costs       <dbl> 0, 24, 0, 22, 22, 23, 0
$ type_of_day <fct> Weekend, Workday, Holiday, Workday, Workday, Workday, W...
$ profits     <dbl> 0, 74, 0, 70, 72, 70, 0
$ workday     <lgl> FALSE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE


In [22]:
# We can use conditional statements to select a subset from dataframes
company$workday == TRUE & company$profits > 70
cat("\n")
company[company$workday == TRUE & company$profits > 70, ]

# The same result can be achived using the subset() function
subset(company, workday==TRUE & profits>70)




Unnamed: 0,days,revenues,costs,type_of_day,profits,workday
2,Monday,98,24,Workday,74,True
5,Thursday,94,22,Workday,72,True


Unnamed: 0,days,revenues,costs,type_of_day,profits,workday
2,Monday,98,24,Workday,74,True
5,Thursday,94,22,Workday,72,True


# Other base R functions

## Sequences

In [23]:
# You can create sequences of numbers with the seq() function.
# You can specify the starting and ending number with the parameters from= and to=.
numbers <- seq(from = 1, to = 5)
numbers

# For shorthand, you can simply type the starting and ending numbers.
numbers <- seq(1, 5)
numbers

# Alternatively, you can use the syntax seq(a:b),
# where a is the starting number, and b is the ending number.
numbers <- seq(1:7)
numbers

# When using the colon-syntax, you are not required to use the seq() function at all.
# That is, the exact code would simply read "a:b"
1:7

In [24]:
# The by= parameter can be used to specify the step size.
numbers <- seq(1,10, by=2)
numbers
numbers <- seq(1, 10, by=0.2)
numbers

In [25]:
# The length.out parameter can be used to specify the number of elements the sequence should have.
numbers <- seq(1, 10, length.out=3)
numbers

numbers <- seq(1, 2, length.out=10)
numbers

In [26]:
# The along.with parameter can be set to another vector,
# The resulting sequence will have the same length.
# NOTE: The corresponding vector need not be the same data type.
character_vector_of_length_7 <- c("this", "is", "not", "numeric", "this", "is", "character")
numeric_vector_of_length_7 <- seq(1, 3, along.with = character_vector_of_length_7)
numeric_vector_of_length_7

## Repititions

In [27]:
# You can repeat any variable (including vectors, lists, or dataframes) with the rep() function,
# The times= parameter defines how many times to repeat the given element.
vector_of_fives <- rep(5, times=3)
vector_of_fives
vector_of_fives <- rep(5, 4)
vector_of_fives

In [28]:
list_of_things <- list(1:3, TRUE, FALSE, 4.5, 7)
list_of_things_repeated <- rep(list_of_things, 3)
list_of_things_repeated

In [29]:
dataframe_of_data <- data.frame(matrix(1:9, ncol=3, byrow=TRUE))
dataframe_of_data_repeated <- rep(dataframe_of_data, times = 2)
dataframe_of_data_repeated

In [30]:
# The length.out function can be used to specify the exact length;
# If repeating a vector or similar variable type,
# this may leave off a number of variables,
# in order to meet the specified length.
numbers <- c(1, 2, 3, 4, 5)
numbers_repeated <- rep(numbers, length.out = 20)
numbers_repeated

# If repeating a vector or similar variable type,
# this may leave off elements,
# in order to meet the specified length.
numbers_repeated <- rep(numbers, length.out = 13)
numbers_repeated

In [31]:
# If working with a vector or similar data type,
# You can use the each= parameter to repeat each variable the specified number of times,
# before moving on to the next variable.
numbers_repeated <- rep(numbers, each=2)
numbers_repeated
numbers_repeated <- rep(numbers, times=3, each=4)
numbers_repeated
##################################################