In [1]:
from io import StringIO
import glob
import os
import re
# the peak callers all inherit from DatabaseApi, a class which offers an interface 
# to a sqlite database to store Calling Cards data
from callingcardstools.database_managers.yeast import HopsDb
from callingcardstools.PackageResources import Resources
import pandas as pd

# This object allows retrieval of package resources
cc_resources = Resources()

# create a database either in memory or at a specified location
#yeast_db = hopsdb("/home/oguzkhan/Desktop/cc_metadata/hops_db.sqlite")
yeast_db = HopsDb("/home/oguzkhan/projects/rank_response_shiny/data/qc_db_v2.sqlite")

Checking table column names...
Current database tables are valid


In [5]:
from callingcardstools.BarcodeParser import BarcodeParser
# d = {
# 	'run_5301_5088': ['/mnt/scratch/calling_cards/sequence/run_5301_5088/run_5301_5088_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_5301_5088/cctools_split/id_bc_map.tsv'],
# 	'run_5690': ['/mnt/scratch/calling_cards/sequence/run_5690/run_5690_barcode_details.json', '/mnt/scratch/calling_cards/sequence/run_5690/cctools_split/id_bc_map.tsv'],
# 	'run_6100': ['/mnt/scratch/calling_cards/sequence/run_6100/run_6100_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_6100/cctools_split/id_bc_map.tsv'],
# 	'run_6106': ['/mnt/scratch/calling_cards/sequence/run_6106/run_6106_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_6106/cctools_split/id_bc_map.tsv'],
# }
d = {
	'run_6177': ['/mnt/scratch/calling_cards/mitra_pipeline/run_6177/cctools_split/run_6177_barcode_details.json','/mnt/scratch/calling_cards/mitra_pipeline/run_6177/cctools_split/id_bc_map.tsv']
}

for k,v in d.items():
	bp = BarcodeParser(v[0])
	yeast_db.add_batch_qc(bp,v[1])

In [7]:
ccf_df = pd.read_csv("/home/oguzkhan/projects/rank_response_shiny/data/run_6177_ccf_lookup.csv")
ccf_df['batch'] = ccf_df['batch'].str.lower()
batch_tbl = pd.read_sql_query('Select * from batch', yeast_db.con)
ccf_df_with_batch_id = pd.merge(ccf_df,batch_tbl,how='left', on=['batch','tf', 'replicate'])

In [10]:

df_list = []
def add_ccf_to_db(row: pd.Series) -> None:
	"""read in the ccf, augment and add to the yeast_db

	Args:
		row (pd.Series): a row from the ccf_df
	"""
	# note that only the first 6 rows are used for the names. sample, the 6th, 
	# is added in this function
	df = pd.read_csv(row['ccf'], 
	                 sep = '\t', 
					 names = ['chr','start','end','depth','strand','annotation'])
	df['batch_id'] = row['id']
	print(df.head(2))
	yeast_db.add_frame(df,'qbed',table_type='experiment',tablename_suffix = row['tf'], fk_tablelist=['batch'])



In [11]:
#ccf_df.apply(lambda row: extract_descriptors(row), axis=1, result_type='expand')
ccf_df_with_batch_id.apply(lambda row: add_ccf_to_db(row), axis=1)

    chr  start   end  depth strand  annotation  batch_id
0  chrI   1412  1413      1      +         NaN       106
1  chrI   1641  1642      1      -         NaN       106
    chr  start    end  depth strand  annotation  batch_id
0  chrI  67996  67997      1      +         NaN       108
1  chrI  71219  71220      1      +         NaN       108
    chr  start    end  depth strand  annotation  batch_id
0  chrI  33149  33150      1      +         NaN       107
1  chrI  71005  71006      1      -         NaN       107
      chr   start     end  depth strand  annotation  batch_id
0  chrIII    1430    1431      1      +         NaN       110
1    chrV  566699  566700      1      -         NaN       110
    chr  start   end  depth strand  annotation  batch_id
0  chrI   6371  6372      1      +         NaN       111
1  chrI   6483  6484      1      -         NaN       111
    chr   start     end  depth strand  annotation  batch_id
0  chrI   57948   57949      1      +         NaN       112
1  c

0    None
1    None
2    None
3    None
4    None
5    None
6    None
dtype: object

In [12]:
background_and_expr_tbls = [x for x in yeast_db.list_tables(yeast_db.con) if re.search(r"^background|^experiment",x)]
for regions_tbl in ['regions_yiming', 'regions_not_orf']:
    for qbed in background_and_expr_tbls:
        yeast_db.create_aggregate_view(qbed,regions_tbl)

In [14]:
# experiment_list = [x for x in yeast_db.list_tables(yeast_db.con) if re.match('^experiment_', x)]

experiment_list = ['experiment_DAL80',
 'experiment_GCR1',
 'experiment_GZF3',
 'experiment_INO2',
 'experiment_MET31']

for region_tbl in ['regions_yiming', 'regions_not_orf']:
	print(f"region: {region_tbl}")
	for background_tbl in ['background_adh1', 'background_dSir4']:
		print(f"background: {background_tbl}")
		for experiment_tbl in experiment_list:
			#print(f"experiment: {experiment_tbl}")
			yeast_db.peak_caller(regions = region_tbl,background = background_tbl, experiment = experiment_tbl,if_exists='replace')

region: regions_yiming
background: background_adh1
background: background_dSir4
region: regions_not_orf
background: background_adh1
background: background_dSir4
