elegant-scipy · stefanv · Mar 18, 2019 · Mar 18, 2019 · Mar 18, 2019 · Mar 18, 2019
diff --git a/Makefile b/Makefile
@@ -32,21 +32,16 @@ TITLES := preface ch1 ch2 ch3 ch4 ch5 ch6 ch7 ch8 epilogue acknowledgements
 CHS_ := $(addprefix $(BUILD_HTML)/,$(TITLES))
 chs: build_dirs $(addsuffix .html,$(CHS_))
 
-ipynb/ch1.ipynb: data/counts.txt
+ipynb/ch1.ipynb: data/counts.txt.bz2
 
-ipynb/ch2.ipynb: data/counts.txt
+ipynb/ch2.ipynb: data/counts.txt.bz2
 
 ipynb/ch4.ipynb: $(FIGURES)/radar_time_signals.png $(FIGURES)/sliding_window.png
 
 ipynb/ch7.ipynb: $(FIGURES)/optimization_comparison.png
 
 ipynb/ch8.ipynb: data/dm6.fa
 
-.SECONDARY: data/counts.txt data/dm6.fa data/dm6.fa.gz
-
-data/counts.txt: data/counts.txt.bz2
-	 bunzip2 -d -k -f data/counts.txt.bz2
-
 data/dm6.fa: data/dm6.fa.gz
 	 gunzip -f -k $<
 
@@ -110,7 +105,7 @@ $(BUILD_NB)/%.ipynb: %.markdown style/elegant.mplstyle build_dirs
 # .SECONDARY: Ensure ipynb files are not deleted after being generated.
 NBS_ := $(addprefix $(BUILD_NB)/,$(TITLES))
 nbs: $(addsuffix .ipynb,$(NBS_))
-.SECONDARY: nbs data/counts.txt data/dm6.fa data/dm6.fa.gz
+.SECONDARY: nbs data/dm6.fa data/dm6.fa.gz
 
 # .PHONY: Special Makefile variable specifying targets that don't
 #     correspond to any actual files.

diff --git a/markdown/ch1.markdown b/markdown/ch1.markdown
@@ -373,7 +373,7 @@ It allows us to express complex operations concisely and efficiently.
 ## Exploring a Gene Expression Dataset
 
 The dataset that we'll be using is an RNAseq experiment of skin cancer samples from The Cancer Genome Atlas (TCGA) project (http://cancergenome.nih.gov/).
-We've already cleaned and sorted the data for you, so you can just use `data/counts.txt`
+We've already cleaned and sorted the data for you, so you can use `data/counts.txt.bz2`
 in the book repository.
 In Chapter 2 we will be using this gene expression data to predict mortality in skin cancer patients, reproducing a simplified version of [Figures 5A and 5B](http://www.cell.com/action/showImagesData?pii=S0092-8674%2815%2900634-0) of a [paper](http://dx.doi.org/10.1016/j.cell.2015.05.044) from the TCGA consortium.
 But first we need to get our heads around the biases in our data, and think about how we could improve it.
@@ -395,12 +395,13 @@ In later chapters we will see a bit more of pandas, but for details, read *Pytho
 for Data Analysis* (O'Reilly) by the creator of pandas, Wes McKinney.
 
 ```python
+import bz2
 import numpy as np
 import pandas as pd
 
 # Import TCGA melanoma data
-filename = 'data/counts.txt'
-with open(filename, 'rt') as f:
+filename = 'data/counts.txt.bz2'
+with bz2.open(filename, 'rt') as f:
     data_table = pd.read_csv(f, index_col=0) # Parse file with pandas
 
 print(data_table.iloc[:5, :5])

diff --git a/markdown/ch2.markdown b/markdown/ch2.markdown
@@ -93,12 +93,14 @@ As in Chapter 1, first we will use pandas to make our job of reading in the data
 First we will read in our counts data as a pandas table.
 
 ```python
+import bz2
 import numpy as np
 import pandas as pd
 
 # Import TCGA melanoma data
-filename = 'data/counts.txt'
-data_table = pd.read_csv(filename, index_col=0)  # Parse file with pandas
+filename = 'data/counts.txt.bz2'
+with bz2.open(filename, mode='rt') as f:
+    data_table = pd.read_csv(f, index_col=0)  # Parse file with pandas
 
 print(data_table.iloc[:5, :5])
 ```