In [14]:
# loading library for simple manipulations and reading data
library(tidyverse)
library(edgeR)

# The here command allows you to pass simpler paths based on project root in R
mcf_file <- "/data/turing_data_dump/MCF7/Gene/count_matrix.csv"
t47d_file <- "/data/turing_data_dump/T47D/Gene/count_matrix.csv"

tmp_df <-
    full_join(
        read_csv(mcf_file),
        read_csv(t47d_file),
        by = "X1"
    ) %>%
    dplyr::filter(!str_detect(X1, "NA\\.[0-9]{0,3}"))

count_mat <-
    tmp_df[, -1] %>%
    as.matrix() %>%
    magrittr::set_rownames(tmp_df$X1)

group <-
    colnames(count_mat) %>%
    str_replace("SLX-\\d{5} UDI\\d{4} ", "") %>%
    str_extract("(MCF7|T47D) (.*?)(?=( ))")

# Create a list object used for edgeR
all_dat <- DGEList(counts = count_mat, group = group, remove.zeros = T)

# calculate normalisation factor and dispersion of genes
all_dat <- calcNormFactors(all_dat, method = "TMM")

all_dat <- estimateGLMRobustDisp(all_dat, verbose = T)

# extract normalised counts
norm_count_l <-
    log2(cpm(all_dat, normalized.lib.sizes = TRUE, log = FALSE) + 1) %>%
    as_tibble(rownames = "gene")

norm_count <- cpm(all_dat, normalized.lib.sizes = TRUE, log = FALSE) %>%
    as_tibble(rownames = "gene")

write_csv(norm_count, "/shared/tony/norm_count.csv")
write_csv(norm_count_l, "/shared/tony/norm_count-log.csv")

“Missing column names filled in: 'X1' [1]”


Parsed with column specification:
cols(
  .default = col_double(),
  X1 = [31mcol_character()[39m
)



See spec(...) for full column specifications.



“Missing column names filled in: 'X1' [1]”


Parsed with column specification:
cols(
  .default = col_double(),
  X1 = [31mcol_character()[39m
)



See spec(...) for full column specifications.



Iteration 1: Re-fitting GLM. 


Re-estimating trended dispersion.



Re-estimating tagwise dispersion.



Iteration 2: Re-fitting GLM. 


Re-estimating trended dispersion.



Re-estimating tagwise dispersion.



Iteration 3: Re-fitting GLM. 


Re-estimating trended dispersion.



Re-estimating tagwise dispersion.



Iteration 4: Re-fitting GLM. 


Re-estimating trended dispersion.



Re-estimating tagwise dispersion.



Iteration 5: Re-fitting GLM. 


Re-estimating trended dispersion.



Re-estimating tagwise dispersion.



Iteration 6: Re-fitting GLM. 


Re-estimating trended dispersion.



Re-estimating tagwise dispersion.



In [10]:
data.frame(norm_count_l)

Unnamed: 0_level_0,SLX.18061.UDI0001.MCF7.SUMO3.m16.B,SLX.18061.UDI0002.MCF7.NCOA3.NCOA3.2.E,SLX.18061.UDI0003.MCF7.SUMO1.21m1.D,SLX.18061.UDI0004.MCF7.TFAP2C.31a.A,SLX.18061.UDI0005.MCF7.CREBBP.28gr3.A,SLX.18061.UDI0006.MCF7.NRIP1.5c.D,SLX.18061.UDI0007.MCF7.CTRL.Renilla.D,SLX.18061.UDI0008.MCF7.GRHL2.37b.B,SLX.18061.UDI0009.MCF7.NCOA3.1dr2.D,SLX.18061.UDI0010.MCF7.SUMO3.24ar3.C,⋯,SLX.17619.UDI0068.T47D.NR2F2.17ar3.C,SLX.17619.UDI0070.T47D.DPF2.27a.D,SLX.17619.UDI0071.T47D.SUMO1.21m1.C,SLX.17619.UDI0072.T47D.CTRL.Renilla.B,SLX.17619.UDI0073.T47D.ZMIZ1.19ar3.C,SLX.17619.UDI0074.T47D.SUMO2.35m.C,SLX.17619.UDI0075.T47D.CREBBP.34a.E,SLX.17619.UDI0076.T47D.GRHL2.37b.B,SLX.17619.UDI0077.T47D.RARA.7b.E,SLX.17619.UDI0078.T47D.NRIP1.5c.C
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
DDX11L1,0.7480253,0.4928952,0.6569768,0.8330013,0.7050267,0.92794847,0.5334501,0.8359868,0.7040804,0.6532975,⋯,0.2634337,0.1129484,0.3624382,0.08655423,0.13016971,0.5801773,0.14597299,0.08047563,0.2105460,0.6064654
WASH7P,4.7038361,5.0715286,4.5183746,4.4166506,4.8954470,4.78006571,4.7034128,4.7762830,4.7413809,4.8915166,⋯,5.0769999,4.9809051,4.3884770,5.15232764,4.98583243,4.7288292,4.86870198,4.95965205,5.1332487,5.1958296
LOC729737,5.1282366,5.0792736,4.8178562,4.4550285,5.0484935,5.02857107,5.1512998,4.9958132,5.4161889,4.9868063,⋯,5.0187572,5.0067156,3.2292041,5.09083692,4.04723217,3.5497570,3.09758401,3.53764603,5.2209530,5.0840233
LOC102725121,0.3559082,0.1836521,0.2536969,0.2697852,0.4437126,0.63372850,0.2912419,0.6958140,0.2924056,0.3383962,⋯,0.2634337,0.2176925,0.1925709,0.08655423,0.06655246,0.3448489,0.00000000,0.08047563,0.2105460,0.4874265
WASH9P,5.1066419,5.3734778,4.9448604,4.7653779,5.1652960,5.09569100,5.0748454,5.1879440,5.1132099,5.2764233,⋯,5.4737445,5.3570283,4.7820362,5.58284994,5.37035928,5.0811771,5.20069852,5.24361991,5.5232611,5.6085398
LOC100132287,4.8348846,4.8404051,4.4939632,4.1080756,4.9235362,4.79577741,4.9192956,4.7543639,5.1915816,4.7067115,⋯,4.7064366,4.7405933,3.0334262,4.87374526,3.78104792,3.3667586,2.91615425,3.27237456,4.9473827,4.7115915
LOC105378947,1.3778488,0.9298182,1.2300851,1.4255409,1.2049934,0.72408713,1.1427030,1.1896134,1.3633195,0.8065240,⋯,0.1808541,0.4068026,0.2514438,0.24548731,0.24956047,0.8560327,0.21377025,0.22909533,0.7035472,0.3576751
LOC100133331,4.8267798,4.7067927,4.4502234,4.1251768,4.8238807,4.82924357,4.8403865,4.6772447,5.0886529,4.6394463,⋯,4.6803726,4.6936947,3.1070037,4.87374526,3.67838795,3.3349408,2.86435880,3.16574381,4.8719756,4.6436212
LOC100288069,2.4845421,1.7060176,2.2181328,2.0445864,2.4831769,2.07352302,2.4759515,2.1605555,2.1277026,2.3621858,⋯,2.9655616,2.8620703,2.1463818,2.85697289,2.60852812,2.1293571,2.08662663,2.39097761,2.9938408,3.1105128
FAM87B,0.3101317,0.6674592,0.4693824,0.2197139,0.3449136,0.26372048,0.5334501,0.3665199,0.3443931,0.4818418,⋯,0.1377213,0.1662709,0.3624382,0.24548731,0.13016971,0.3950858,0.21377025,0.15669840,0.3355709,0.1115500
