From 0024871cb6b8611159a169a918f463f605368061 Mon Sep 17 00:00:00 2001 From: GitHub Actions Date: Wed, 10 Apr 2024 15:42:37 +0000 Subject: [PATCH] markdown source builds Auto-generated via {sandpaper} Source : 46d1774cc80a5e8fe44b15f8a0313b98e90b04ef Branch : main Author : Jason Williams Time : 2024-04-10 15:41:08 +0000 Message : Merge pull request #269 from naupaka/main Address #120 by adding short description of `$` when it is first used --- 03-basics-factors-dataframes.md | 33 +++++++++++++++++---------------- md5sum.txt | 2 +- 2 files changed, 18 insertions(+), 17 deletions(-) diff --git a/03-basics-factors-dataframes.md b/03-basics-factors-dataframes.md index af783532..c6354487 100644 --- a/03-basics-factors-dataframes.md +++ b/03-basics-factors-dataframes.md @@ -150,7 +150,7 @@ for `read.table("file.csv", sep = ",")`. You can see in the help documentation that there are several additional variations of `read.table`, such as `read.csv2` to read tables separated by `;` and `read.delim` to read in tables separated by `\t` (tabs). If you know how your table is separated, you can use one of the provided short cuts, -but case you run into an unconventional separator you are now equipt with the knowledge to define it in the `sep = ` arugument of `read.table`! +but case you run into an unconventional separator you are now equipped with the knowledge to define it in the `sep = ` argument of `read.table`! :::::::::::::::::::::::::::::::::::::::::::::::::: @@ -271,7 +271,7 @@ new data frame using the `data.frame()` function. ```r ## put the first three columns of variants into a new data frame called subset -subset<-data.frame(variants[,c(1:3,6)]) +subset <- data.frame(variants[, c(1:3, 6)]) ``` Now, let's use the `str()` (structure) function to look a little more closely @@ -305,12 +305,13 @@ Ok, thats a lot up unpack! Some things to notice. Factors are the final major data structure we will introduce in our R genomics lessons. Factors can be thought of as vectors which are specialized for categorical data. Given R's specialization for statistics, this make sense since -categorial and continuous variables are usually treated differently. Sometimes +categorical and continuous variables are usually treated differently. Sometimes you may want to have data treated as a factor, but in other cases, this may be undesirable. -Let's see the value of treating some of which are categorical in nature as -factors. Let's take a look at just the alternate alleles +Let's explore the value of treating some vectors that are categorical in nature as +factors. To do this we'll take a look at just the alternate alleles. We can use the `$` operator +to access or extract a column by its name in data frames (or to extract objects within named lists). ```r @@ -331,12 +332,12 @@ head(alt_alleles) ``` There are 801 alleles (one for each row). To simplify, lets look at just the -single-nuleotide alleles (SNPs). We can use some of the vector indexing skills +single-nucleotide alleles (SNPs). We can use some of the vector indexing skills from the last episode. ```r -snps <- c(alt_alleles[alt_alleles=="A"], +snps <- c(alt_alleles[alt_alleles == "A"], alt_alleles[alt_alleles=="T"], alt_alleles[alt_alleles=="G"], alt_alleles[alt_alleles=="C"]) @@ -563,7 +564,7 @@ a. ```r -variants[1,1] +variants[1, 1] ``` ```{.output} @@ -574,7 +575,7 @@ b. ```r -variants[2,4] +variants[2, 4] ``` ```{.output} @@ -585,7 +586,7 @@ c. ```r -variants[801,29] +variants[801, 29] ``` ```{.output} @@ -653,7 +654,7 @@ f. ```r -variants[1:4,1] +variants[1:4, 1] ``` ```{.output} @@ -664,7 +665,7 @@ g. ```r -variants[1:10,c("REF","ALT")] +variants[1:10, c("REF", "ALT")] ``` ```{.output} @@ -696,7 +697,7 @@ h. ```r -variants[,c("sample_id")] +variants[, c("sample_id")] ``` @@ -798,7 +799,7 @@ l. ```r -variants[variants$REF == "A",] +variants[variants$REF == "A", ] ``` @@ -861,7 +862,7 @@ them to a new object name: ```r # create a new data frame containing only observations from SRR2584863 -SRR2584863_variants <- variants[variants$sample_id == "SRR2584863",] +SRR2584863_variants <- variants[variants$sample_id == "SRR2584863", ] # check the dimension of the data frame @@ -1361,7 +1362,7 @@ table(as.factor(Ecoli_metadata$cit)) ``` ```r -Ecoli_metadata[7,7] +Ecoli_metadata[7, 7] ``` ```{.output} diff --git a/md5sum.txt b/md5sum.txt index 1858640c..cd7b826c 100644 --- a/md5sum.txt +++ b/md5sum.txt @@ -6,7 +6,7 @@ "episodes/00-introduction.Rmd" "e1354ed92fb458179c8c00b00ee1cf55" "site/built/00-introduction.md" "2024-04-04" "episodes/01-r-basics.Rmd" "2f4b7fd244990f97e0c2fe88bae2618b" "site/built/01-r-basics.md" "2024-04-04" "episodes/02-data-prelude.Rmd" "ab2b1fd3cdaae919f9e409f713a0a8ad" "site/built/02-data-prelude.md" "2024-04-04" -"episodes/03-basics-factors-dataframes.Rmd" "cab7ab3fe53143558e6af3eee5774d35" "site/built/03-basics-factors-dataframes.md" "2024-04-04" +"episodes/03-basics-factors-dataframes.Rmd" "d46879cbe37a7b1f21a9ed50f49ed4d5" "site/built/03-basics-factors-dataframes.md" "2024-04-10" "episodes/04-bioconductor-vcfr.Rmd" "10eb69b4697d7ecb9695d36c0d974208" "site/built/04-bioconductor-vcfr.md" "2024-04-04" "episodes/05-dplyr.Rmd" "f74055bd8677338a213e0a0c6c430119" "site/built/05-dplyr.md" "2024-04-04" "episodes/06-data-visualization.Rmd" "0b45534421bad05f040b24c40b6da71b" "site/built/06-data-visualization.md" "2024-04-04"