# `polr` Ordinal Regression

## Purpose: 

* Install `polr` package 
* Load non-zero features from `ordinalNet`
* Get p-values of features using `polr`

## Packages

Install "polr" package

In [1]:
# install.packages("MASS", "/home/yraghav/notebook_container/R/MASS")

# install.packages("ordinal", "/home/yraghav/notebook_container/R/ordinal")

In [2]:
.libPaths()
.libPaths(c(.libPaths(), "/home/yraghav/notebook_container/R/MASS"))
.libPaths()

In [3]:
library("MASS")
library("data.table")
# library("ordinal")

In [4]:
options("expressions" = 500000)
memory.limit(size=80000000)

“'memory.limit()' is Windows-specific”


## ATAC-Seq

In [5]:
# load table first, do not check names, transpose so rows are samples and features are columns

ATAC_matrix = (data.table::fread(
    "/home/yraghav/MIT-Fraenkel-Lab/Projects/CHDI_NeuroLINCS/advanced_analysis/Ordinal_Regression/2_PCA_feature_selection/output/scaled/H3K27me3.PCA_selected_features.scaled.matrix", 
    header=TRUE, 
    sep="\t",
    check.names=FALSE
    ))

In [6]:
# ATAC_matrix

In [7]:
# rownames(ATAC_matrix) = ATAC_matrix[,1]
# ATAC_matrix = ATAC_matrix[,-1]

In [8]:
# original_row_names=rownames(ATAC_matrix)
original_row_names = ATAC_matrix$V1
original_row_names

In [9]:
# ATAC_matrix

## Get Significant Features

In [10]:
sig_features_table = data.table::fread(
    "/home/yraghav/MIT-Fraenkel-Lab/Projects/CHDI_NeuroLINCS/advanced_analysis/Ordinal_Regression/3_ordinalNet_feature_selection/output/H3K27me3.coefficients.matrix",
    sep="\t", 
    check.names=FALSE,
    skip=3
    )
sig_features_table

In [11]:
sig_features = c(sig_features_table[V2!=0]$V1)

sig_features

In [12]:
ATAC_matrix = subset(ATAC_matrix, select=c(sig_features))
rownames(ATAC_matrix) = original_row_names
ATAC_matrix

## Assign Condition

In [13]:
############
# ATAC-Seq #
############

# condition = c("Control", "Control", "High", "Medium", "Medium", "High", "Control", "Control", "Medium", "Control", "Control", "High")
# condition = factor(condition, order=TRUE, levels = c("Control", "Medium", "High"))

# condition

condition = list()

for (sample in original_row_names){
    q_length= strsplit(sample, split="_")[[1]][1]

    if (grepl("20-20",q_length)==TRUE | grepl("22-20", q_length)==TRUE){

        condition = c(condition, "Control")

    }
    else if (grepl("56-22",q_length)==TRUE){
        condition = c(condition, "Medium")
    }
    else if (grepl("72-20", q_length)==TRUE){
        condition = c(condition, "High")
    }

}

condition = factor(condition, order=TRUE, levels = c("Control", "Medium", "High"))

In [14]:
ATAC_matrix = cbind(condition, ATAC_matrix) 
ATAC_matrix

condition,V1,chr1:865151-866637,chr1:893984-895972,chr1:896338-896626,chr1:978039-1012768,chr1:1033469-1040884,chr1:1093124-1102685,chr1:1116459-1122235,chr1:1173861-1177877,⋯,chrX:155195977-155197058,chrX:155808742-155809545,chrX:155997290-155999632,chrY:11107221-11107764,chrY:11291679-11294673,chrY:11312482-11312986,chrY:56830571-56839930,chrY:56840097-56841100,chrY:56842147-56844584,chrY:56871910-56872550
<ord>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
Medium,'56-22_H3K27me3_rep3_210115Fra_D20-4893-3_NA_sequence.fastq.merged.nodup.no_chrM_MT',-1.6415823,-1.6151683,-1.17805846,-1.77204579,-1.79919376,-1.6019669,-1.78619955,-1.65083154,⋯,-1.24573662,-0.90803886,-1.14199386,-1.0767967,-1.280114494,-1.12021974,-1.2677838,-1.2849386,-1.43228821,-1.2209447
Control,'20-20_H3K27me3_rep3-1_210115Fra_D20-4766-2_NA_sequence.fastq.merged.nodup.no_chrM_MT',-0.04662999,0.07230367,-0.53674212,-0.06528382,-0.08707753,-0.15698326,0.07069496,-0.16666137,⋯,0.3230981,-0.32075846,-0.4585132,-0.4772263,-0.334667846,-0.20106473,-0.1970321,-0.3013612,-0.05304641,-0.7781738
Control,'20-20_H3K27me3_rep2_210115Fra_D20-5077-3_NA_sequence.fastq.merged.nodup.no_chrM_MT',0.07320741,0.06073715,1.1520281,0.04457452,0.13176943,0.04068799,0.10005586,-0.12742834,⋯,-0.52344644,-0.39128403,-0.83835861,-1.0155648,-0.979618739,-0.59758074,-0.5987141,-0.5101605,-0.42341693,-0.8414904
Control,'22-20_H3K27me3_rep3_210115Fra_D20-4764-1_NA_sequence.fastq.merged.nodup.no_chrM_MT',-1.39159931,-1.42368316,-0.89273338,-1.35831062,-1.37679446,-1.24285529,-1.36771221,-1.35822078,⋯,-0.91582772,-0.56308003,-0.78063955,-0.6458549,-0.515246801,-1.00574208,-0.7490242,-0.5421367,-1.05423485,-0.8737104
Control,'22-20_H3K27me3_rep1_210115Fra_D20-4169-1_NA_sequence.fastq.merged.nodup.no_chrM_MT',0.11591543,0.26023328,-1.53231784,0.7248259,0.79573163,-0.25944238,0.58983954,0.81519832,⋯,2.56752925,2.25705717,2.66385058,2.3907511,2.032436587,1.93679696,2.2287754,1.8374929,1.86666497,2.2731651
Medium,'56-22_H3K27me3_rep2_210115Fra_D20-5079-3_NA_sequence.fastq.merged.nodup.no_chrM_MT',-0.280775,-0.20189874,-0.14354469,-0.33650393,-0.38544716,-0.35390168,-0.3775965,-0.31856457,⋯,-1.09262109,-0.53536736,-0.23991049,-0.6186544,-0.798120147,-0.64691917,-0.5983104,-0.9099538,-0.5775882,-0.6869814
Medium,'56-22_H3K27me3_rep1_210115Fra_D20-5525-4_NA_sequence.fastq.merged.nodup.no_chrM_MT',0.08965781,0.1288058,-0.04158733,-0.07820146,-0.13952452,0.38902221,-0.03150021,-0.02117041,⋯,0.07181798,-0.13406964,-0.62875018,-0.4098423,0.083376585,0.2681866,-0.1281676,-0.1706913,0.06829408,0.8113858
High,'72-20_H3K27me3_rep3_210115Fra_D20-4896-4_NA_sequence.fastq.merged.nodup.no_chrM_MT',-1.53569585,-1.29248131,-1.07526278,-1.4393086,-1.43144369,-1.34597075,-1.38412874,-1.44921966,⋯,-0.58816605,-0.89291907,-0.9789034,-1.0490367,-0.855488832,-1.47656774,-1.0312473,-1.3716949,-1.28766745,-1.019369
High,'72-20_H3K27me3_rep1_210115Fra_D20-5528-3_NA_sequence.fastq.merged.nodup.no_chrM_MT',1.12070069,1.00255463,0.99760941,0.98414955,0.97916619,1.33006417,1.10968302,1.06905042,⋯,0.6222036,-0.43773737,0.77006665,1.1920584,0.299268465,-0.03357013,0.3620827,1.1450446,0.41910178,0.9267091
Control,'22-20_H3K27me3_rep1-1_210115Fra_D20-5405-3_NA_sequence.fastq.merged.nodup.no_chrM_MT',-0.21543643,-0.79761072,0.22819363,-0.19488379,-0.08740195,-0.3893451,-0.360967,-0.34354768,⋯,-0.47444861,-0.02761864,0.04989674,-0.2222849,-0.456937028,-0.01333567,-0.2781429,-0.2025586,0.015155,0.3246458


## Fit Model

In [15]:
fit = polr("condition ~ . - V1", data=(ATAC_matrix), Hess=TRUE, method = "logistic")

# fit = clm("condition ~ .", data=(ATAC_matrix), Hess=TRUE, method = "logistic")

“design appears to be rank-deficient, so dropping some coefs”


In [16]:
summary_table <- coef(summary(fit))
pval <- pnorm(abs(summary_table[, "t value"]),lower.tail = FALSE)* 2
summary_table <- cbind(summary_table, "p value" = round(pval,3))
summary_table

Unnamed: 0,Value,Std. Error,t value,p value
`chr1:865151-866637`,74.210889,3993.567,0.018582608,0.985
`chr1:893984-895972`,338.104152,4165.645,0.081164903,0.935
`chr1:896338-896626`,-98.720472,3175.405,-0.0310891,0.975
`chr1:978039-1012768`,428.733032,1357.353,0.315859542,0.752
`chr1:1033469-1040884`,335.489287,2806.946,0.119521094,0.905
`chr1:1093124-1102685`,194.349824,5985.047,0.032472565,0.974
`chr1:1116459-1122235`,-620.641034,7959.952,-0.077970449,0.938
`chr1:1173861-1177877`,-328.873149,8935.752,-0.036804195,0.971
`chr1:1187587-1193352`,-62.070009,9234.584,-0.006721473,0.995
`chr1:1193465-1196167`,-155.13666,5245.223,-0.029576753,0.976


In [17]:
summary(fit)

Call:
polr(formula = "condition ~ . - V1", data = (ATAC_matrix), Hess = TRUE, 
    method = "logistic")

Coefficients:
                         Value Std. Error   t value
`chr1:865151-866637`     74.21       3994  0.018583
`chr1:893984-895972`    338.10       4166  0.081165
`chr1:896338-896626`    -98.72       3175 -0.031089
`chr1:978039-1012768`   428.73       1357  0.315860
`chr1:1033469-1040884`  335.49       2807  0.119521
`chr1:1093124-1102685`  194.35       5985  0.032473
`chr1:1116459-1122235` -620.64       7960 -0.077970
`chr1:1173861-1177877` -328.87       8936 -0.036804
`chr1:1187587-1193352`  -62.07       9235 -0.006721
`chr1:1193465-1196167` -155.14       5245 -0.029577
`chr1:1202231-1208107` -110.06       7293 -0.015090
`chr1:1586785-1587347`  -34.63       1844 -0.018786

Intercepts:
               Value     Std. Error t value  
Control|Medium   -2.4779 1809.1479    -0.0014
Medium|High      38.8419 3289.3779     0.0118

Residual Deviance: 6.996155e-05 
AIC: 28.00007 

In [18]:
coef(fit)