# OPC/Endothelial Enrichment Analyis from RNA-Seq

In [1]:
import urllib
import os

import pandas
import numpy

## Download data

In [2]:
path = os.path.join('download', 'barreslab_rnaseq.xlsx')
if not os.path.exists(path):
    url = 'http://web.stanford.edu/group/barres_lab/barreslab_rnaseq.xlsx'
    urllib.request.urlretrieve(url, path)

## Read GO annotations

In [3]:
url = 'https://github.com/dhimmel/gene-ontology/raw/87bab297f55db283e65a7a984607316b409415ae/annotations/taxid_10090/GO_annotations-10090-inferred-allev.tsv'
go_df = pandas.read_table(url)
go_df.head(2)

Unnamed: 0,go_id,go_name,go_domain,tax_id,annotation_type,size,gene_ids,gene_symbols
0,GO:0000002,mitochondrial genome maintenance,biological_process,10090,inferred,26,11545|16882|17258|17527|18975|19819|22059|2379...,Parp1|Lig3|Mef2a|Mpv17|Polg|Rnaseh1|Trp53|Akt3...
1,GO:0000003,reproduction,biological_process,10090,inferred,1182,11287|11421|11430|11434|11441|11477|11480|1148...,Pzp|Ace|Acox1|Acr|Chrna7|Acvr1|Acvr2a|Ada|Adam...


## Process expression

In [4]:
path = os.path.join('download', 'barreslab_rnaseq.xlsx')
fpkm_df = pandas.read_excel(path, sheetname='Raw Data')

In [5]:
# Transform FPKM values
for column in fpkm_df.columns[2:]:
    fpkm_df[column] = numpy.arcsinh(fpkm_df[column])

In [6]:
fpkm_df.head(2)

Unnamed: 0,Gene symbol,Description,Astrocytes,Neuron,Oligodendrocyte Precursor Cell,Newly Formed Oligodendrocyte,Myelinating Oligodendrocytes,Microglia,Endothelial Cells
0,0610005C13Rik,,0.15039,0.099834,0.099834,0.099834,0.099834,0.099834,0.099834
1,0610007C21Rik,,4.497514,4.225162,4.958988,4.753422,4.68233,5.183443,4.845211


In [7]:
# Choose GO IDs to include
go_ids = [
    'GO:0098609', # cell-cell adhesion biological process
]

go_names = list()

for go_id in go_ids:
    go_genes = go_df.loc[go_df.go_id == go_id, 'gene_symbols'].iloc[0].split('|')
    go_name = go_df.loc[go_df.go_id == go_id, 'go_name'].iloc[0]
    go_names.append(go_name)
    fpkm_df[go_name] = fpkm_df['Gene symbol'].isin(go_genes).astype(int)

fpkm_df[go_names].sum('rows')

cell-cell adhesion    525
dtype: int64

In [8]:
# Calculate enrichments
fpkm_df['OPC_enrichment'] = fpkm_df['Oligodendrocyte Precursor Cell'] / fpkm_df['Myelinating Oligodendrocytes']

non_endothelials = ['Astrocytes', 'Neuron', 'Oligodendrocyte Precursor Cell',
                    'Newly Formed Oligodendrocyte', 'Myelinating Oligodendrocytes', 'Microglia']

fpkm_df['endothelial_enrichment'] = fpkm_df['Endothelial Cells'] / fpkm_df[non_endothelials].mean(axis='columns')

In [9]:
fpkm_df = fpkm_df.sort_values('Gene symbol')
path = os.path.join('data', 'OPC_enrichment.tsv')
fpkm_df.to_csv(path, sep='\t', index=False)