In [2]:
from io import StringIO
import glob
import os
import re
# the peak callers all inherit from DatabaseApi, a class which offers an interface 
# to a sqlite database to store Calling Cards data
from callingcardstools.database_managers.yeast import HopsDb
from callingcardstools.PackageResources import Resources
import pandas as pd

# This object allows retrieval of package resources
cc_resources = Resources()

# create a database either in memory or at a specified location
#yeast_db = hopsdb("/home/oguzkhan/Desktop/cc_metadata/hops_db.sqlite")
yeast_db = HopsDb("/home/oguzkhan/projects/rank_response_shiny/data/qc_db_v2.sqlite")

Checking table column names...
Current database tables are valid


In [4]:
from callingcardstools.BarcodeParser import BarcodeParser
# d = {
# 	'run_5301_5088': ['/mnt/scratch/calling_cards/sequence/run_5301_5088/run_5301_5088_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_5301_5088/cctools_split/id_bc_map.tsv'],
# 	'run_5690': ['/mnt/scratch/calling_cards/sequence/run_5690/run_5690_barcode_details.json', '/mnt/scratch/calling_cards/sequence/run_5690/cctools_split/id_bc_map.tsv'],
# 	'run_6100': ['/mnt/scratch/calling_cards/sequence/run_6100/run_6100_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_6100/cctools_split/id_bc_map.tsv'],
# 	'run_6106': ['/mnt/scratch/calling_cards/sequence/run_6106/run_6106_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_6106/cctools_split/id_bc_map.tsv'],
# }
d = {
	'run_6073': ['/mnt/scratch/calling_cards/mitra_pipeline/run_6073/redo_mitra_pipeline/run_6073_barcode_details.json','/mnt/scratch/calling_cards/mitra_pipeline/run_6073/redo_mitra_pipeline/cctools_split/id_bc_map.tsv']
}

for k,v in d.items():
	bp = BarcodeParser(v[0])
	#yeast_db.add_batch_qc(bp,v[1])

In [9]:
ccf_df = pd.read_csv("/home/oguzkhan/projects/rank_response_shiny/data/runs_6073_redo.csv")
ccf_df['batch'] = ccf_df['batch'].str.lower()
batch_tbl = pd.read_sql_query('Select * from batch', yeast_db.con)
ccf_df_with_batch_id = pd.merge(ccf_df,batch_tbl,how='left', on=['batch','tf', 'replicate'])

In [12]:

df_list = []
def add_ccf_to_db(row: pd.Series) -> None:
	"""read in the ccf, augment and add to the yeast_db

	Args:
		row (pd.Series): a row from the ccf_df
	"""
	# note that only the first 6 rows are used for the names. sample, the 6th, 
	# is added in this function
	df = pd.read_csv(row['ccf'], 
	                 sep = '\t', 
					 names = ['chr','start','end','depth','strand','annotation'])
	df['batch_id'] = row['id']
	print(df.head(2))
	yeast_db.add_frame(df,'qbed',table_type='experiment',tablename_suffix = row['tf'], fk_tablelist=['batch'])



In [21]:
x = pd.read_csv(ccf_df_with_batch_id.loc[1,'ccf'],sep='\t',names = ['chr','start','end','depth','strand','annotation'])
x=x[['chr','start','end','depth','strand']]

In [23]:
ccf_df = pd.read_csv("/home/oguzkhan/projects/rank_response_shiny/data/runs_53015088_5690_ccfs.csv")
ccf_df['batch'] = ccf_df['batch'].str.lower()
batch_tbl = pd.read_sql_query('Select * from batch', yeast_db.con)
ccf_df_with_batch_id = pd.merge(ccf_df,batch_tbl,how='left', on=['batch','tf', 'replicate'])

In [13]:
ccf_df_with_batch_id.apply(lambda row: add_ccf_to_db(row), axis=1)

    chr  start   end  depth strand  annotation  batch_id
0  chrI   1431  1432      1      +         NaN        94
1  chrI   1542  1543      1      -         NaN        94
    chr  start   end  depth strand  annotation  batch_id
0  chrI   1625  1626      1      -         NaN        95
1  chrI   1688  1689      1      +         NaN        95
    chr  start   end  depth strand  annotation  batch_id
0  chrI    520   521      1      +         NaN        96
1  chrI   2082  2083      4      -         NaN        96
    chr  start   end  depth strand  annotation  batch_id
0  chrI   3372  3373      1      +         NaN        97
1  chrI   5724  5725      1      -         NaN        97
    chr  start   end  depth strand  annotation  batch_id
0  chrI   1106  1107      2      +         NaN        98
1  chrI   1243  1244     14      +         NaN        98


0    None
1    None
2    None
3    None
4    None
dtype: object

In [14]:
background_and_expr_tbls = [x for x in yeast_db.list_tables(yeast_db.con) if re.search(r"^background|^experiment",x)]
for regions_tbl in ['regions_yiming', 'regions_not_orf']:
    for qbed in background_and_expr_tbls:
        yeast_db.create_aggregate_view(qbed,regions_tbl)

In [15]:
for region_tbl in ['regions_yiming', 'regions_not_orf']:
	print(f"region: {region_tbl}")
	for background_tbl in ['background_adh1', 'background_dSir4']:
		print(f"background: {background_tbl}")
		for experiment_tbl in [x for x in yeast_db.list_tables(yeast_db.con) if re.match('^experiment_', x)]:
			print(f"experiment: {experiment_tbl}")
			yeast_db.peak_caller(regions = region_tbl,background = background_tbl, experiment = experiment_tbl,if_exists='replace')

region: regions_yiming
background: background_adh1
experiment: experiment_ARO80
experiment: experiment_CAT8
experiment: experiment_CBF1
experiment: experiment_GLN3
experiment: experiment_MIG2
experiment: experiment_CRZ1
experiment: experiment_HAP4
experiment: experiment_IXR1
experiment: experiment_ROX1
experiment: experiment_RTG3
experiment: experiment_HAP2
experiment: experiment_HAP3
experiment: experiment_HAP5
experiment: experiment_PHO2
experiment: experiment_PHO4
experiment: experiment_ERT1
experiment: experiment_MET32
experiment: experiment_MTH1
experiment: experiment_RIM101
experiment: experiment_RTG1
experiment: experiment_DAL80
experiment: experiment_GZF3
experiment: experiment_GCR1
experiment: experiment_MET31
background: background_dSir4
experiment: experiment_ARO80
experiment: experiment_CAT8
experiment: experiment_CBF1
experiment: experiment_GLN3
experiment: experiment_MIG2
experiment: experiment_CRZ1
experiment: experiment_HAP4
experiment: experiment_IXR1
experiment: exper