diff --git a/01 Intro to R.Rmd b/01 Intro to R.Rmd index 198e48c..1db046d 100644 --- a/01 Intro to R.Rmd +++ b/01 Intro to R.Rmd @@ -31,6 +31,9 @@ output: + + + ```{r echo=FALSE} .counterExercise <- 0 @@ -157,9 +160,9 @@ paste0(my.states) What does the `nchar()` function do? The `paste()` function? Does it make a difference to use `sep=""` or `collapse=","`? What about `paste0()`? ## Exercises -`r exNum("Print all even numbers less than 100")` -`r exNum("What is the mean of even numbers less than 100")` -`r exNum('Have R put in alphabetical order \x60c("WA","DC","CA","PA","MD","VA","OH")\x60')` +`r .exNum("Print all even numbers less than 100")` +`r .exNum("What is the mean of even numbers less than 100")` +`r .exNum('Have R put in alphabetical order \x60c("WA","DC","CA","PA","MD","VA","OH")\x60')` # Assignment of values to variables @@ -215,11 +218,11 @@ state.names[i[1:3]] # show me those three states Note that in the last example we used square brackets within square brackets. First, we asked R to give us the indices of the first three states in alphabetical order and that was `r i[1:3]`. Then R took those three values and plugged them into the second set of square brackets to show you the state names in those positions in the collection. ## Exercises -`r exNum("What's the last state in the \x60state.names\x60?")` -`r exNum('Pick out states that begin with "M" using their indices')` -`r exNum("Pick out states where you have lived")` -`r exNum("What's the last state in alphabetical order?")` -`r exNum("What are the last three states in alphabetical order?")` +`r .exNum("What's the last state in the \x60state.names\x60?")` +`r .exNum('Pick out states that begin with "M" using their indices')` +`r .exNum("Pick out states where you have lived")` +`r .exNum("What's the last state in alphabetical order?")` +`r .exNum("What are the last three states in alphabetical order?")` # Logical values and operations @@ -305,9 +308,9 @@ sum(my.states %in% c("CA","OR","WA","AK","HI")) Note in the last line we used `sum()` to count for how many of the elements in `my.states` did `%in%` evaluate to be `TRUE`. ## Exercises -`r exNum("Report \x60TRUE\x60 or \x60FALSE\x60 for each state depending on if you have lived there")` -`r exNum("With \x60a <- 1:100\x60, pick out odd numbers between 50 and 75")` -`r exNum("Use greater than less than signs to get all state names that begin with M")` +`r .exNum("Report \x60TRUE\x60 or \x60FALSE\x60 for each state depending on if you have lived there")` +`r .exNum("With \x60a <- 1:100\x60, pick out odd numbers between 50 and 75")` +`r .exNum("Use greater than less than signs to get all state names that begin with M")` # Sampling The function `sample()` randomly shuffles a collection of values. @@ -328,10 +331,10 @@ table(a) max(table(a)) # find out which value appears most frequently ``` ## Exercises -`r exNum("Use \x60sample()\x60 to estimate the probability of rolling a 6")` -`r exNum("Use \x60sample()\x60 to estimate the probability that the sum of two die equal 7")` -`r exNum("Use \x60sample()\x60 to select randomly five states without replacement")` -`r exNum("Use \x60sample()\x60 to select randomly 1000 states with replacement")` +`r .exNum("Use \x60sample()\x60 to estimate the probability of rolling a 6")` +`r .exNum("Use \x60sample()\x60 to estimate the probability that the sum of two die equal 7")` +`r .exNum("Use \x60sample()\x60 to select randomly five states without replacement")` +`r .exNum("Use \x60sample()\x60 to select randomly 1000 states with replacement")` + Tabulate how often each state was selected + Which state was selected the least? Make R do this for you @@ -439,12 +442,12 @@ ls() Assuming you are using R Studio, you can also see the objects stored in memory by clicking on the Environment tab. ## Exercises -`r exNum('Fix \x60state.list\x60 so that "DC" is in "other" rather than "east"')`. Here are a few hints +`r .exNum('Fix \x60state.list\x60 so that "DC" is in "other" rather than "east"')`. Here are a few hints + access "other" using `$` + combine things using `c()` + assign values using `<-` + remove values using `[]` with a negative index or using a logical statement -`r exNum("Print out east and central states together sorted")` +`r .exNum("Print out east and central states together sorted")` # Functions @@ -480,9 +483,9 @@ IQR You can see that it computes the 0.25 quantile and the 0.75 quantile and uses `diff()` to compute their difference. ## Exercises -`r exNum('Make a function \x60is.island(x)\x60 returns \x60TRUE\x60 if \x60x\x60 is an island')`. Islands are "HI", "FM", "MH", "PW", "AS", "GU", "MP", "PR", "VI", "UM". Borrow the template I used for `give.first.and.last()`. Then try using the `%in%` operator -`r exNum("Count how many islands are within each region. Use an \x60sapply()\x60 (or two) and your new \x60is.island()\x60 function")` -`r exNum("Which components of \x60b\x60 having missing values? Use \x60is.na()\x60")`. `b` was defined earlier +`r .exNum('Make a function \x60is.island(x)\x60 returns \x60TRUE\x60 if \x60x\x60 is an island')`. Islands are "HI", "FM", "MH", "PW", "AS", "GU", "MP", "PR", "VI", "UM". Borrow the template I used for `give.first.and.last()`. Then try using the `%in%` operator +`r .exNum("Count how many islands are within each region. Use an \x60sapply()\x60 (or two) and your new \x60is.island()\x60 function")` +`r .exNum("Which components of \x60b\x60 having missing values? Use \x60is.na()\x60")`. `b` was defined earlier # Matrices and apply() @@ -600,9 +603,9 @@ with(chicagoCrime, sort(table(Primary.Type[District==10]))) Much easier to read and understand! ## Exercises -`r exNum("Display three randomly selected rows")` -`r exNum("Count \x60NA\x60s in each column")` -`r exNum("Look up \x60Location.Description\x60, \x60Block\x60, \x60Beat\x60, and \x60Ward\x60 for those missing \x60Latitude\x60")` +`r .exNum("Display three randomly selected rows")` +`r .exNum("Count \x60NA\x60s in each column")` +`r .exNum("Look up \x60Location.Description\x60, \x60Block\x60, \x60Beat\x60, and \x60Ward\x60 for those missing \x60Latitude\x60")` # For loops Sometimes we need to have R repeat certain tasks multiple times, such as marching through each row of a dataset and modifying values. For loops accomplish this. Later in this course we will be using Google Maps to extract information about addresses. So we might need to iterate through every row in the dataset, check whether the latitude and longitude are missing, and if missing try to retrieve the latitude and longitude from Google Maps. The last crime in the dataset missing coordinates is in row 9954. @@ -697,11 +700,11 @@ chicagoCrime$google.maps.url <- paste("https://www.google.com/maps/place/", This took `r timeWithoutForLoop[3]` seconds. That's `r round(time4ForLoop[3]/timeWithoutForLoop[3],1)` times faster than the for loop. ## Exercises -`r exNum('Use a for loop to create a variable \x60Coordinates\x60 that looks like "(X.Coordinate,Y.Coordinate)"')` +`r .exNum('Use a for loop to create a variable \x60Coordinates\x60 that looks like "(X.Coordinate,Y.Coordinate)"')` + Use `paste()` with the `X.Coordinate` and `Y.Coordinate` variables + Remember the `sep=` option in `paste()` + You might find using the `with()` function to simplify your code and avoid having a lot of `chicagoCrime$`s -`r exNum("Redo the previous exercise without using a for loop and compare computation time")` +`r .exNum("Redo the previous exercise without using a for loop and compare computation time")` # More tabulating, aggregating, and breaking statistics down by group The variable `Arrest` indicates whether someone was arrested for the crime. Here are the first 10 values. @@ -750,8 +753,8 @@ barplot(a$`(Arrest == "true")`, ``` ## Exercises -`r exNum('How many assaults occurred in the street? (\x60Location.Description=="STREET"\x60)')`. Try using `subset()` even though there are other ways -`r exNum("What percentage of assaults occurred in the street by Ward?")` +`r .exNum('How many assaults occurred in the street? (\x60Location.Description=="STREET"\x60)')`. Try using `subset()` even though there are other ways +`r .exNum("What percentage of assaults occurred in the street by Ward?")` # Plotting Data @@ -809,14 +812,14 @@ text(ifelse(tab<80, 180, tab-5), # x-coord of text, adj=1) # right justify text ``` -# Exercises -`r exNum("Make a barplot indicating how many states are in each region. Use \x60state.list\x60")` -`r exNum("Identify the beat with the most crimes")` -`r exNum("Identify the beat with the most domestic violence incidents")` -`r exNum("Part 1 crimes are homicide, robbery, assault, arson, burglary, theft, sex offense, motor vehicle theft. Calculate the number of Part 1 crimes in Chicago")` +## Exercises +`r .exNum("Make a barplot indicating how many states are in each region. Use \x60state.list\x60")` +`r .exNum("Identify the beat with the most crimes")` +`r .exNum("Identify the beat with the most domestic violence incidents")` +`r .exNum("Part 1 crimes are homicide, robbery, assault, arson, burglary, theft, sex offense, motor vehicle theft. Calculate the number of Part 1 crimes in Chicago")` # Solutions to the exercises -1. `r exerciseQuestions[1]` +1. `r .exerciseQuestions[1]` ```{r comment=""} (1:49)*2 ``` @@ -825,22 +828,22 @@ or seq(2,98,by=2) ``` -2. `r exerciseQuestions[2]` +2. `r .exerciseQuestions[2]` ```{r comment=""} mean((1:49)*2) ``` -3. `r exerciseQuestions[3]` +3. `r .exerciseQuestions[3]` ```{r comment=""} sort(c("WA","DC","CA","PA","MD","VA","OH")) ``` -4. `r exerciseQuestions[4]` +4. `r .exerciseQuestions[4]` ```{r comment=""} state.names[51] ``` -5. `r exerciseQuestions[5]` +5. `r .exerciseQuestions[5]` ```{r comment=""} state.names[c(7,8,21,24,28,32,35,46)] ``` @@ -853,13 +856,13 @@ Here's another possible answer that uses `substring` (which we haven't covered y state.names[substring(state.names, 1, 1)=="M"] ``` -6. `r exerciseQuestions[6]` +6. `r .exerciseQuestions[6]` Of course, these may vary depending on where you have lived. ```{r comment=""} state.names[c(1, 4, 10, 26)] ``` -7. `r exerciseQuestions[7]` +7. `r .exerciseQuestions[7]` ```{r comment=""} sort(state.names)[51] ``` @@ -868,29 +871,29 @@ or rev(sort(state.names))[1] ``` -8. `r exerciseQuestions[8]` +8. `r .exerciseQuestions[8]` ```{r comment=""} rev(sort(state.names))[1:3] ``` -9. `r exerciseQuestions[9]` +9. `r .exerciseQuestions[9]` ```{r comment=""} my.states <- c("PA", "NJ", "NY", "MD", "DE", "MA", "RI", "CT", "ME", "LA", "IN") state.names %in% my.states ``` -10. `r exerciseQuestions[10]` +10. `r .exerciseQuestions[10]` ```{r comment=""} a <- 1:100 a[a %% 2==1 & a>50 & a<75] ``` -11. `r exerciseQuestions[11]` +11. `r .exerciseQuestions[11]` ```{r comment=""} state.names[state.names>"LZ" & state.names<"N"] ``` -12. `r exerciseQuestions[12]` +12. `r .exerciseQuestions[12]` ```{r comment=""} a <- sample(1:6, size=100000, replace=TRUE) table(a)[6]/length(a) @@ -904,7 +907,7 @@ Or mean(a==6) ``` -13. `r exerciseQuestions[13]` +13. `r .exerciseQuestions[13]` ```{r comment=""} dice1 <- sample(1:6, size=1000, replace=TRUE) dice2 <- sample(1:6, size=1000, replace=TRUE) @@ -912,12 +915,12 @@ doubleroll <- dice1 + dice2 mean(doubleroll==7) # should be close to 1/6 or 0.1666... ``` -14. `r exerciseQuestions[14]` (Answers will vary) +14. `r .exerciseQuestions[14]` (Answers will vary) ```{r comment=""} sample(state.names, size=5, replace=FALSE) ``` -15. `r exerciseQuestions[15]` +15. `r .exerciseQuestions[15]` + Tabulate how often each state was selected (Answers will vary) ```{r comment=""} a <- sample(state.names, size=1000, replace=TRUE) @@ -929,7 +932,7 @@ table(a) sort(table(a))[1] ``` -16. `r exerciseQuestions[16]` +16. `r .exerciseQuestions[16]` ```{r comment=""} state.list$east <- state.list$east[state.list$east!="DC"] state.list$other <- c(state.list$other, "DC") @@ -942,7 +945,7 @@ state.list$other <- c(state.list$other, "DC") state.list ``` -17. `r exerciseQuestions[17]` +17. `r .exerciseQuestions[17]` ```{r comment=""} sort(c(state.list$east, state.list$central)) ``` @@ -951,7 +954,7 @@ Or with(state.list, sort(c(east, central))) ``` -18. `r exerciseQuestions[18]` +18. `r .exerciseQuestions[18]` ```{r comment=""} is.island <- function(x) { @@ -959,7 +962,7 @@ is.island <- function(x) } ``` -19. `r exerciseQuestions[19]` +19. `r .exerciseQuestions[19]` First, this `lapply()` asks each state if they are an island. ```{r comment=""} @@ -970,7 +973,7 @@ Now we want to count up how many `TRUE`s there are in each component, so wrap th sapply(lapply(state.list, is.island), sum) ``` -20. `r exerciseQuestions[20]` +20. `r .exerciseQuestions[20]` ```{r comment=""} sapply(lapply(b, is.na), any) ``` @@ -980,12 +983,12 @@ b <- list(0:9, c("A","B","C"), c(TRUE,FALSE,NA)) sapply(b, function(x) any(is.na(x))) ``` -21. `r exerciseQuestions[21]` +21. `r .exerciseQuestions[21]` ```{r comment=""} chicagoCrime[sample(1:nrow(chicagoCrime), size=3),] ``` -22. `r exerciseQuestions[22]` +22. `r .exerciseQuestions[22]` ```{r comment=""} sapply(lapply(chicagoCrime, is.na), sum) ``` @@ -994,7 +997,7 @@ Or sapply(chicagoCrime, function(x) sum(is.na(x))) ``` -23. `r exerciseQuestions[23]` +23. `r .exerciseQuestions[23]` ```{r comment=""} i <- is.na(chicagoCrime$Latitude) # Let's just show the first 5 rows @@ -1007,7 +1010,7 @@ subset(chicagoCrime, is.na(chicagoCrime$Latitude), select=c("Location.Description","Block","Beat","Ward"))[1:5,] ``` -24. `r exerciseQuestions[24]` +24. `r .exerciseQuestions[24]` ```{r comment=""} system.time( for (i in 1:nrow(chicagoCrime)) @@ -1028,7 +1031,7 @@ for (i in 1:nrow(chicagoCrime)) } ) ``` -25. `r exerciseQuestions[25]` +25. `r .exerciseQuestions[25]` ```{r comment=""} system.time( chicagoCrime$coords3 <- with(chicagoCrime, @@ -1036,25 +1039,25 @@ chicagoCrime$coords3 <- with(chicagoCrime, ) ``` -26. `r exerciseQuestions[26]` +26. `r .exerciseQuestions[26]` ```{r comment=""} with(subset(chicagoCrime, Primary.Type=="ASSAULT"), sum(chicagoCrime$Location.Description=="STREET")) ``` -27. `r exerciseQuestions[27]` +27. `r .exerciseQuestions[27]` ```{r comment=""} aggregate((Location.Description=="STREET")~Ward, data=subset(chicagoCrime, Primary.Type=="ASSAULT"), mean) ``` -28. `r exerciseQuestions[28]` +28. `r .exerciseQuestions[28]` ```{r comment=""} barplot(sapply(state.list, length)) ``` -29. `r exerciseQuestions[29]` +29. `r .exerciseQuestions[29]` ```{r comment=""} names(rev(sort(table(chicagoCrime$Beat)))[1]) ``` @@ -1063,13 +1066,13 @@ Or names(which.max(table(chicagoCrime$Beat))) ``` -30. `r exerciseQuestions[30]` +30. `r .exerciseQuestions[30]` ```{r comment=""} with(subset(chicagoCrime, Description=="DOMESTIC BATTERY SIMPLE"), names(which.max(table(Beat)))) ``` -31. `r exerciseQuestions[31]` +31. `r .exerciseQuestions[31]` ```{r comment=""} sum(chicagoCrime$Primary.Type %in% c("HOMICIDE", "ROBBERY", "ASSAULT", "ARSON", "BURGLARY", "THEFT", "SEX OFFENSE", diff --git a/01_Intro_to_R.html b/01_Intro_to_R.html index cfa7daa..5056d1b 100644 --- a/01_Intro_to_R.html +++ b/01_Intro_to_R.html @@ -10,7 +10,7 @@ - +
This is the first set of notes for an introduction to R programming from criminology and criminal justice. These notes assume that you have the latest version of R and R Studio installed. We are also assuming that you know how to start a new script file and submit code to the R console. From that basic knowledge about using R, we are going to start with 2+2
and by the end of this set of notes you will load in a small Chicago crime dataset, create a few plots, count some crimes, and be able to subset the data. Our aim is to build a firm foundation on which we will build throughout this set of notes.
[1] 6 9 5 4 2 8 3 1 10 7
- [1] 5 2 7 1 4 10 6 3 9 8
- [1] 4 1 9 10 7 5 3 2 6 8
+ [1] 6 4 2 9 7 1 5 10 3 8
+ [1] 3 7 9 10 1 6 4 5 8 2
+ [1] 4 6 1 2 10 9 3 5 8 7
Notice that sample()
has several options including size=
to indicate how many to select and replace=
to indicate whether to sample with or without replacement. You can access the help on the sample()
function by typing ?sample
at the R prompt.
a
1 2 3 4 5 6
-182 165 168 157 178 150
-[1] 182
+155 185 168 147 181 164
+[1] 185
lapply(state.list,sample,size=3,replace=FALSE)
$west
-[1] "UT" "ID" "AZ"
+[1] "ID" "AZ" "CA"
$east
-[1] "MS" "DC" "OH"
+[1] "MD" "NH" "OH"
$central
-[1] "NE" "WI" "MI"
+[1] "OK" "WI" "SD"
sapply(state.list,length)
west east central
11 24 16
@@ -600,9 +603,8 @@ This creates a separate object called other
, unconnected to our state.list
. By using the $
we add our new collection of states (other) to state.list
.
We have now created a lot of objects. At any time you can run ls()
to list all the objects that R has in memory.
ls()
-[1] "a" "b" "counterExercise"
-[4] "exerciseQuestions" "exNum" "i"
-[7] "my.states" "state.list" "state.names"
+[1] "a" "b" "i" "my.states" "state.list"
+[6] "state.names"
Assuming you are using R Studio, you can also see the objects stored in memory by clicking on the Environment tab.
function (x, na.rm = FALSE, type = 7)
diff(quantile(as.numeric(x), c(0.25, 0.75), na.rm = na.rm, names = FALSE,
type = type))
-<bytecode: 0x000000001d36aad8>
+<bytecode: 0x0000000016585f08>
<environment: namespace:stats>
You can see that it computes the 0.25 quantile and the 0.75 quantile and uses diff()
to compute their difference.
a <- matrix(sample(1:5,size=12,replace=TRUE),nrow=4)
a
[,1] [,2] [,3]
-[1,] 1 4 1
-[2,] 2 4 2
-[3,] 4 2 3
-[4,] 2 1 3
+[1,] 3 4 1
+[2,] 4 3 2
+[3,] 2 3 5
+[4,] 3 1 1
This matrix has two dimensions, 4 rows and 3 columns. You can use square brackets to select elements from the matrix.
a[1,2] # element in first row, second column
a[1,] # the entire first row
@@ -686,36 +688,36 @@ Matrices and apply()
a[-1,-1] # dropping the first row and first column
a[3:4,2:3] # rows 3 & 4, columns 2 & 3
[1] 4
-[1] 1 4 1
-[1] 4 4 2 1
+[1] 3 4 1
+[1] 4 3 3 1
[,1] [,2]
-[1,] 4 2
-[2,] 2 3
-[3,] 1 3
+[1,] 3 2
+[2,] 3 5
+[3,] 1 1
[,1] [,2]
-[1,] 2 3
-[2,] 1 3
+[1,] 3 5
+[2,] 1 1
The numbers to the left of the comma index rows and the numbers to the right of the comma index columns. The apply()
function, like the lapply()
and sapply()
functions, allows you to apply a function to all the rows or all the columns of a matrix. apply()
needs the name of the matrix, whether you want to apply the function to the first dimension (rows) or the second dimension (columns), and the name of the function to apply.
apply(a, 1, sum) # compute sum of each row
apply(a, 2, sum) # compute sum of each column
apply(a, 1, mean) # compute mean of each row
apply(a, 1, summary) # summarize each row
-[1] 6 8 9 6
-[1] 9 11 9
-[1] 2.000000 2.666667 3.000000 2.000000
- [,1] [,2] [,3] [,4]
-Min. 1.0 2.000000 2.0 1.0
-1st Qu. 1.0 2.000000 2.5 1.5
-Median 1.0 2.000000 3.0 2.0
-Mean 2.0 2.666667 3.0 2.0
-3rd Qu. 2.5 3.000000 3.5 2.5
-Max. 4.0 4.000000 4.0 3.0
+[1] 8 9 10 5
+[1] 12 11 9
+[1] 2.666667 3.000000 3.333333 1.666667
+ [,1] [,2] [,3] [,4]
+Min. 1.000000 2.0 2.000000 1.000000
+1st Qu. 2.000000 2.5 2.500000 1.000000
+Median 3.000000 3.0 3.000000 1.000000
+Mean 2.666667 3.0 3.333333 1.666667
+3rd Qu. 3.500000 3.5 4.000000 2.000000
+Max. 4.000000 4.0 5.000000 3.000000
We can also create a new function right on the spot to compute something on each row or column. Let’s find the minimum and maximum values in each row and find out if all the values are greater than 1.
apply(a, 1, function(x) {c(min(x),max(x))}) # there is also a function range()
apply(a, 1, function(x) {all(x>1)})
[,1] [,2] [,3] [,4]
[1,] 1 2 2 1
-[2,] 4 4 4 3
+[2,] 4 4 5 3
[1] FALSE TRUE TRUE FALSE
load("chicago crime 20141124-20141209.RData")
List the objects R now has in memory and you will see that there is a new object, chicagoCrime
.
ls()
- [1] "a" "b" "chicagoCrime"
- [4] "counterExercise" "exerciseQuestions" "exNum"
- [7] "give.first.and.last" "i" "my.states"
-[10] "state.list" "state.names"
+[1] "a" "b" "chicagoCrime"
+[4] "give.first.and.last" "i" "my.states"
+[7] "state.list" "state.names"
If you did not spell the name of the .RData file exactly correctly, then R will give you an error. A common occurrence when downloading the same file from the web multiple times is for your web browser to add numbers to the multiple versions you’ve downloaded. So check the file name carefully. Here’s what happens when I request a file that doesn’t exist.
load("chicago crime.RData")
Warning in readChar(con, 5L, useBytes = TRUE): cannot open compressed file
@@ -1629,7 +1630,7 @@ For loops
",+Chicago,+IL",sep="")
}
)
-Note that we’ve wrapped the for loop with a call to system.time()
. This will keep the time on how long this for loop takes. When creating these notes on a laptop it took 0.56 seconds. Not bad. Much faster than having to type out these 10,000 URLs. However, if we had one million addresses, then this code is going to take much more time.
Note that we’ve wrapped the for loop with a call to system.time()
. This will keep the time on how long this for loop takes. When creating these notes on a laptop it took 0.58 seconds. Not bad. Much faster than having to type out these 10,000 URLs. However, if we had one million addresses, then this code is going to take much more time.
In fact, in R for loops are very slow. They are so slow that R programmers attempt to avoid them whenever possible. We can actually accomplish the same task without using a for loop. gsub()
will accept a whole collection of addresses and modify them all at once. paste()
also will accept a collection of text values and paste them together with the other parts.
timeWithoutForLoop <- system.time(
{
@@ -1639,7 +1640,7 @@ For loops
",+Chicago,+IL",sep="")
}
)
-This took 0.02 seconds. That’s 28 times faster than the for loop.
+This took 0.02 seconds. That’s 29 times faster than the for loop.
state.list
a <- sample(1:6, size=100000, replace=TRUE)
table(a)[6]/length(a)
- 6
-0.16624
+ 6
+0.1672
Or
sum(a==6)/length(a)
-[1] 0.16624
+[1] 0.1672
Or
mean(a==6)
-[1] 0.16624
+[1] 0.1672
sample()
to estimate the probability that the sum of two die equal 7[1] 0.159
+[1] 0.156
sample()
to select randomly five states without replacement (Answers will vary)sample(state.names, size=5, replace=FALSE)
-[1] "MS" "NJ" "MD" "DE" "IL"
+[1] "OR" "TX" "KY" "HI" "AR"
sample()
to select randomly 1000 states with replacementa
AK AL AR AZ CA CO CT DC DE FL GA HI IA ID IL IN KS KY LA MA MD ME MI MN MO
-19 23 14 15 20 28 20 22 15 20 12 18 22 17 20 16 23 28 21 17 18 27 21 19 21
+14 10 18 18 28 13 21 21 26 19 19 17 26 13 21 19 17 23 13 25 26 19 26 18 17
MS MT NC ND NE NH NJ NM NV NY OH OK OR PA RI SC SD TN TX UT VA VT WA WI WV
-18 26 21 17 20 17 18 18 28 21 16 11 23 19 22 15 17 15 29 20 20 22 12 20 15
+25 19 20 16 25 19 23 23 25 23 18 18 19 10 16 28 13 12 21 19 18 25 18 17 23
WY
-24
+20
sort(table(a))[1]
-OK
-11
+AL
+10
state.list
so that “DC” is in “other” rather than “east”chicagoCrime[sample(1:nrow(chicagoCrime), size=3),]
- ID Case.Number Date Block IUCR
-4323 9881744 HX532834 12/03/2014 11:30:00 AM 033XX W CERMAK RD 1310
-3326 9879694 HX530445 12/04/2014 07:30:00 PM 002XX S CICERO AVE 0530
-9524 9868923 HX519453 11/24/2014 09:15:00 PM 048XX W GEORGE ST 1310
- Primary.Type Description Location.Description
-4323 CRIMINAL DAMAGE TO PROPERTY RESIDENCE
-3326 ASSAULT AGGRAVATED: OTHER DANG WEAPON STREET
-9524 CRIMINAL DAMAGE TO PROPERTY APARTMENT
- Arrest Domestic Beat District Ward Community.Area FBI.Code
-4323 false false 1024 10 22 30 14
-3326 false false 1113 11 28 25 04A
-9524 false false 2521 25 31 19 14
- X.Coordinate Y.Coordinate Year Updated.On Latitude
-4323 1154560 1889141 2014 12/10/2014 12:42:02 PM 41.85162
-3326 1144474 1898281 2014 12/11/2014 12:47:57 PM 41.87690
-9524 1143684 1918886 2014 12/01/2014 12:41:33 PM 41.93346
- Longitude Location
-4323 -87.70821 (41.851621827, -87.708213779)
-3326 -87.74500 (41.876898678, -87.745002411)
-9524 -87.74739 (41.933455891, -87.747386305)
- google.maps.url
-4323 https://www.google.com/maps/place/03350 W CERMAK RD,+Chicago,+IL
-3326 https://www.google.com/maps/place/00250 S CICERO AVE,+Chicago,+IL
-9524 https://www.google.com/maps/place/04850 W GEORGE ST,+Chicago,+IL
+ ID Case.Number Date Block IUCR
+9791 9868419 HX518714 11/24/2014 12:30:00 PM 065XX S WESTERN AVE 0460
+8039 9871729 HX522206 11/27/2014 11:30:00 AM 096XX S HALSTED ST 0486
+3951 9878332 HX529282 12/03/2014 08:55:00 PM 055XX W CORTEZ ST 2024
+ Primary.Type Description Location.Description Arrest
+9791 BATTERY SIMPLE RESTAURANT false
+8039 BATTERY DOMESTIC BATTERY SIMPLE RESIDENCE false
+3951 NARCOTICS POSS: HEROIN(WHITE) RESIDENCE true
+ Domestic Beat District Ward Community.Area FBI.Code X.Coordinate
+9791 false 832 8 15 66 08B 1161532
+8039 true 2223 22 21 73 08B 1172717
+3951 false 1524 15 37 25 18 1139051
+ Y.Coordinate Year Updated.On Latitude Longitude
+9791 1861424 2014 12/01/2014 12:41:33 PM 41.77542 -87.68339
+8039 1840878 2014 12/04/2014 12:50:41 PM 41.71880 -87.64300
+3951 1906410 2014 12/10/2014 12:42:02 PM 41.89931 -87.76472
+ Location
+9791 (41.775420908, -87.683394137)
+8039 (41.718800734, -87.642995516)
+3951 (41.899305964, -87.764716528)
+ google.maps.url
+9791 https://www.google.com/maps/place/06550 S WESTERN AVE,+Chicago,+IL
+8039 https://www.google.com/maps/place/09650 S HALSTED ST,+Chicago,+IL
+3951 https://www.google.com/maps/place/05550 W CORTEZ ST,+Chicago,+IL
NA
s in each column user system elapsed
- 0.62 0.00 0.63
+ 0.58 0.02 0.60
Or
system.time(
for (i in 1:nrow(chicagoCrime))
@@ -2138,7 +2139,7 @@ Solutions to the exercises
}
)
user system elapsed
- 0.63 0.00 0.63
+ 0.53 0.00 0.53