# CRISPRi FlowFISH Validation

In [2]:
import pandas as pd
import numpy as np
import ctar
import anndata as ad

In [3]:
import pybedtools
pybedtools.helpers.set_bedtools_path(
    '/projects/zhanglab/users/ana/bedtools2/bin/')

### Load results
Replace SCENT with method of choice.

In [4]:
# load scent results
scent_file = '/projects/zhanglab/users/ana/multiome/results/scent/Pubic-PBMC_allqced_bootpkg_nopc_allCT.FDR0.10.txt'
scent_df = pd.read_csv(scent_file, sep='\t')
# parse regions
scent_df[['CHROM', 'START', 'END']] = scent_df['peak'].str.split('-',expand=True)
scent_df.dropna(subset=['CHROM', 'START', 'END'], inplace=True)

In [5]:
# convert to bed
scent_bed = pybedtools.BedTool.from_dataframe(scent_df[['CHROM', 'START', 'END', 'gene']])

### Load CRISPRi FlowFISH results

In [6]:
# load crispri flowfish results containing lifted over locations
crispr_file = '/projects/zhanglab/users/ana/multiome/validation/crispr-flowfish-supp5_edited.csv'
crispr_df = pd.read_csv(crispr_file,index_col=0)

In [7]:
# same amount of positive and negative links as reported by SCENT
print('Positive links:',crispr_df[crispr_df.Regulated == True].shape[0])
print('Negative links:',crispr_df[crispr_df.Regulated == False].shape[0])

Positive links: 283
Negative links: 5472


In [8]:
# convert to bed
columns = ['hg38_chr','hg38_start','hg38_end','GeneSymbol']
crispr_pos_bed = pybedtools.BedTool.from_dataframe(crispr_df[crispr_df.Regulated == True][columns])
crispr_neg_bed = pybedtools.BedTool.from_dataframe(crispr_df[crispr_df.Regulated == False][columns])

### Intersection

In [9]:
positive_links = scent_bed.intersect(crispr_pos_bed, wa=True, wb=True)
positive_links = positive_links.to_dataframe(index_col=False,names=['CHROM', 'START', 'END', 'GENE', 'CF_CHROM', 'CF_START', 'CF_END', 'CF_GENE'])
positive_links[positive_links.GENE == positive_links.CF_GENE].shape

(39, 8)

In [10]:
negative_links = scent_bed.intersect(crispr_neg_bed, wa=True, wb=True)
negative_links = negative_links.to_dataframe(index_col=False,names=['CHROM', 'START', 'END', 'GENE', 'CF_CHROM', 'CF_START', 'CF_END', 'CF_GENE'])
negative_links[negative_links.GENE == negative_links.CF_GENE].shape

(83, 8)