From 0024871cb6b8611159a169a918f463f605368061 Mon Sep 17 00:00:00 2001
From: GitHub Actions <actions@github.com>
Date: Wed, 10 Apr 2024 15:42:37 +0000
Subject: [PATCH] markdown source builds

Auto-generated via {sandpaper}
Source  : 46d1774cc80a5e8fe44b15f8a0313b98e90b04ef
Branch  : main
Author  : Jason Williams <JasonJWilliamsNY@gmail.com>
Time    : 2024-04-10 15:41:08 +0000
Message : Merge pull request #269 from naupaka/main

Address #120 by adding short description of `$` when it is first used
---
 03-basics-factors-dataframes.md | 33 +++++++++++++++++----------------
 md5sum.txt                      |  2 +-
 2 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/03-basics-factors-dataframes.md b/03-basics-factors-dataframes.md
index af783532..c6354487 100644
--- a/03-basics-factors-dataframes.md
+++ b/03-basics-factors-dataframes.md
@@ -150,7 +150,7 @@ for `read.table("file.csv", sep = ",")`. You can see in the help
 documentation that there are several additional variations of 
 `read.table`, such as `read.csv2` to read tables separated by `;` 
 and `read.delim` to read in tables separated by `\t` (tabs). If you know how your table is separated, you can use one of the provided short cuts, 
-but case you run into an unconventional separator you are now equipt with the knowledge to define it in the `sep = ` arugument of `read.table`!
+but case you run into an unconventional separator you are now equipped with the knowledge to define it in the `sep = ` argument of `read.table`!
 
 
 ::::::::::::::::::::::::::::::::::::::::::::::::::
@@ -271,7 +271,7 @@ new data frame using the `data.frame()` function.
 ```r
 ## put the first three columns of variants into a new data frame called subset
 
-subset<-data.frame(variants[,c(1:3,6)])
+subset <- data.frame(variants[, c(1:3, 6)])
 ```
 
 Now, let's use the `str()` (structure) function to look a little more closely
@@ -305,12 +305,13 @@ Ok, thats a lot up unpack! Some things to notice.
 Factors are the final major data structure we will introduce in our R genomics
 lessons. Factors can be thought of as vectors which are specialized for
 categorical data. Given R's specialization for statistics, this make sense since
-categorial and continuous variables are usually treated differently. Sometimes
+categorical and continuous variables are usually treated differently. Sometimes
 you may want to have data treated as a factor, but in other cases, this may be
 undesirable.
 
-Let's see the value of treating some of which are categorical in nature as
-factors. Let's take a look at just the alternate alleles
+Let's explore the value of treating some vectors that are categorical in nature as
+factors. To do this we'll take a look at just the alternate alleles. We can use the `$` operator 
+to access or extract a column by its name in data frames (or to extract objects within named lists).
 
 
 ```r
@@ -331,12 +332,12 @@ head(alt_alleles)
 ```
 
 There are 801 alleles (one for each row). To simplify, lets look at just the
-single-nuleotide alleles (SNPs). We can use some of the vector indexing skills
+single-nucleotide alleles (SNPs). We can use some of the vector indexing skills
 from the last episode.
 
 
 ```r
-snps <- c(alt_alleles[alt_alleles=="A"],
+snps <- c(alt_alleles[alt_alleles == "A"],
   alt_alleles[alt_alleles=="T"],
   alt_alleles[alt_alleles=="G"],
   alt_alleles[alt_alleles=="C"])
@@ -563,7 +564,7 @@ a.
 
 
 ```r
-variants[1,1]
+variants[1, 1]
 ```
 
 ```{.output}
@@ -574,7 +575,7 @@ b.
 
 
 ```r
-variants[2,4]
+variants[2, 4]
 ```
 
 ```{.output}
@@ -585,7 +586,7 @@ c.
 
 
 ```r
-variants[801,29]
+variants[801, 29]
 ```
 
 ```{.output}
@@ -653,7 +654,7 @@ f.
 
 
 ```r
-variants[1:4,1]
+variants[1:4, 1]
 ```
 
 ```{.output}
@@ -664,7 +665,7 @@ g.
 
 
 ```r
-variants[1:10,c("REF","ALT")]
+variants[1:10, c("REF", "ALT")]
 ```
 
 ```{.output}
@@ -696,7 +697,7 @@ h.
 
 
 ```r
-variants[,c("sample_id")]
+variants[, c("sample_id")]
 ```
 
 
@@ -798,7 +799,7 @@ l.
 
 
 ```r
-variants[variants$REF == "A",]
+variants[variants$REF == "A", ]
 ```
 
 
@@ -861,7 +862,7 @@ them to a new object name:
 ```r
 # create a new data frame containing only observations from SRR2584863
 
-SRR2584863_variants <- variants[variants$sample_id == "SRR2584863",]
+SRR2584863_variants <- variants[variants$sample_id == "SRR2584863", ]
 
 # check the dimension of the data frame
 
@@ -1361,7 +1362,7 @@ table(as.factor(Ecoli_metadata$cit))
 ```
 
 ```r
-Ecoli_metadata[7,7]
+Ecoli_metadata[7, 7]
 ```
 
 ```{.output}
diff --git a/md5sum.txt b/md5sum.txt
index 1858640c..cd7b826c 100644
--- a/md5sum.txt
+++ b/md5sum.txt
@@ -6,7 +6,7 @@
 "episodes/00-introduction.Rmd" "e1354ed92fb458179c8c00b00ee1cf55" "site/built/00-introduction.md" "2024-04-04"
 "episodes/01-r-basics.Rmd" "2f4b7fd244990f97e0c2fe88bae2618b" "site/built/01-r-basics.md" "2024-04-04"
 "episodes/02-data-prelude.Rmd" "ab2b1fd3cdaae919f9e409f713a0a8ad" "site/built/02-data-prelude.md" "2024-04-04"
-"episodes/03-basics-factors-dataframes.Rmd" "cab7ab3fe53143558e6af3eee5774d35" "site/built/03-basics-factors-dataframes.md" "2024-04-04"
+"episodes/03-basics-factors-dataframes.Rmd" "d46879cbe37a7b1f21a9ed50f49ed4d5" "site/built/03-basics-factors-dataframes.md" "2024-04-10"
 "episodes/04-bioconductor-vcfr.Rmd" "10eb69b4697d7ecb9695d36c0d974208" "site/built/04-bioconductor-vcfr.md" "2024-04-04"
 "episodes/05-dplyr.Rmd" "f74055bd8677338a213e0a0c6c430119" "site/built/05-dplyr.md" "2024-04-04"
 "episodes/06-data-visualization.Rmd" "0b45534421bad05f040b24c40b6da71b" "site/built/06-data-visualization.md" "2024-04-04"