In [2]:
from io import StringIO
import glob
import os
import re
# the peak callers all inherit from DatabaseApi, a class which offers an interface 
# to a sqlite database to store Calling Cards data
from callingcardstools.database_managers.yeast import HopsDb
from callingcardstools.PackageResources import Resources
import pandas as pd

# This object allows retrieval of package resources
cc_resources = Resources()

# create a database either in memory or at a specified location
#yeast_db = hopsdb("/home/oguzkhan/Desktop/cc_metadata/hops_db.sqlite")
yeast_db = HopsDb("/home/oguzkhan/projects/rank_response_shiny/data/qc_db_v2.sqlite")

Checking table column names...
Current database tables are valid


In [7]:
from callingcardstools.BarcodeParser import BarcodeParser
# d = {
# 	'run_5301_5088': ['/mnt/scratch/calling_cards/sequence/run_5301_5088/run_5301_5088_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_5301_5088/cctools_split/id_bc_map.tsv'],
# 	'run_5690': ['/mnt/scratch/calling_cards/sequence/run_5690/run_5690_barcode_details.json', '/mnt/scratch/calling_cards/sequence/run_5690/cctools_split/id_bc_map.tsv'],
# 	'run_6100': ['/mnt/scratch/calling_cards/sequence/run_6100/run_6100_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_6100/cctools_split/id_bc_map.tsv'],
# 	'run_6106': ['/mnt/scratch/calling_cards/sequence/run_6106/run_6106_barcode_details.json','/mnt/scratch/calling_cards/sequence/run_6106/cctools_split/id_bc_map.tsv'],
# }
d = {
	'run_6390': ['/mnt/scratch/cc/mitra_pipeline/run_6390/run_6390_barcode_details.json',
	      '/mnt/scratch/cc/mitra_pipeline/run_6390/cctools_split/id_bc_map.tsv'],
}

for k,v in d.items():
	bp = BarcodeParser(v[0])
	yeast_db.add_batch_qc(bp,v[1])

In [16]:
ccf_basepath='/mnt/scratch/cc/mitra_pipeline/run_6390/ccf/'

tf_list = ['SKO1', 'MSN2', 'SKN7', 
            'USV1', 'DAL80', 'GZF3',
            'INO2', 'MSN2']

replicate_list = ['none', '1', 'none', 
                   'none', 'none', 'none',
                   'none', '2']

ccf_list = [os.path.join(ccf_basepath,x+'_with_annote.ccf') for x in tf_list]

ccf_list[1] = ccf_list[1].replace('MSN2','MSN2_1')

ccf_list[7] = ccf_list[5].replace('MSN2','MSN2_2')

ccf_df = pd.DataFrame.from_dict(
	{'batch': ['run_6390'] * 8,
     'tf': tf_list,
     'replicate': replicate_list,
	 'ccf':ccf_list}
)

#ccf_df = pd.read_csv("/home/oguzkhan/projects/rank_response_shiny/data/run_6177_ccf_lookup.csv")
ccf_df['batch'] = ccf_df['batch'].str.lower()
batch_tbl = pd.read_sql_query('Select * from batch', yeast_db.con)
ccf_df_with_batch_id = pd.merge(ccf_df,batch_tbl,how='left', on=['batch','tf', 'replicate'])

In [18]:

def add_ccf_to_db(row: pd.Series) -> None:
	"""read in the ccf, augment and add to the yeast_db

	Args:
		row (pd.Series): a row from the ccf_df
	"""
	# note that only the first 6 rows are used for the names. sample, the 6th, 
	# is added in this function
	df = pd.read_csv(row['ccf'], 
	                 sep = '\t', 
					 names = ['chr','start','end','depth','strand','annotation'])
	df['batch_id'] = row['id']
	print(df.head(2))
	yeast_db.add_frame(df,'qbed',table_type='experiment',tablename_suffix = row['tf'], fk_tablelist=['batch'])



In [19]:
#ccf_df.apply(lambda row: extract_descriptors(row), axis=1, result_type='expand')
ccf_df_with_batch_id.apply(lambda row: add_ccf_to_db(row), axis=1)

    chr  start    end  depth strand     annotation  batch_id
0  chrI  29341  29342      1      +  GAATCTGAAATCG       125
1  chrI  29410  29411      6      -  GAATCTGAAATCG       125
    chr  start   end  depth strand     annotation  batch_id
0  chrI   5886  5887      1      +  GCTTAAAGCCCCA       126
1  chrI   9248  9249      5      +  GCTTAAAGCCCCA       126
    chr  start   end  depth strand     annotation  batch_id
0  chrI   6279  6280      5      +  GTCCCTACTCTCC       127
1  chrI   6403  6404      1      +  GTCCCTACTCTCC       127
    chr  start   end  depth strand     annotation  batch_id
0  chrI   1997  1998      1      +  TTGCGGTAGTGCC       128
1  chrI   4926  4927      9      -  TTGCGGTAGTGCC       128
    chr   start     end  depth strand     annotation  batch_id
0  chrI  184729  184730      2      -  GCTTATACGGCGT       129
1  chrI  191699  191700      2      +  GCTTATACGGCGT       129
    chr   start     end  depth strand     annotation  batch_id
0  chrI   67886   67887  

0    None
1    None
2    None
3    None
4    None
5    None
6    None
7    None
dtype: object

In [24]:
#background_and_expr_tbls = [x for x in yeast_db.list_tables(yeast_db.con) if re.search(r"^background|^experiment",x)]
experiment_list = ['experiment_'+x for x in tf_list]
# in this case -- remove the last item b/c it is a second replicate of a given TF
# in the run
experiment_list = experiment_list[0:7]

# NOTE! This doesn't need to be re-done if the view already exists, meaning 
# the experiment table already exists. It doesn't hurt to re-run
for regions_tbl in ['regions_yiming', 'regions_not_orf']:
    for qbed in experiment_list:
        yeast_db.create_aggregate_view(qbed,regions_tbl)

In [26]:
# experiment list is being created above now -- see previous block

# experiment_list = [x for x in yeast_db.list_tables(yeast_db.con) if re.match('^experiment_', x)]

# experiment_list = ['experiment_RDS2', 
#                    'experiment_MET31',
#                    'experiment_INO2',
#                    'experiment_CAD1',
#                    'experiment_SIP4',
#                    'experiment_GZF3']

for region_tbl in ['regions_yiming', 'regions_not_orf']:
	print(f"region: {region_tbl}")
	for background_tbl in ['background_adh1', 'background_dSir4']:
		print(f"background: {background_tbl}")
		for experiment_tbl in experiment_list:
			print(f"experiment: {experiment_tbl}")
			yeast_db.peak_caller(regions = region_tbl,background = background_tbl, experiment = experiment_tbl,if_exists='replace')

region: regions_yiming
background: background_adh1
experiment: experiment_SKO1
experiment: experiment_MSN2
experiment: experiment_SKN7
experiment: experiment_USV1
experiment: experiment_DAL80
experiment: experiment_GZF3
experiment: experiment_INO2
background: background_dSir4
experiment: experiment_SKO1
experiment: experiment_MSN2
experiment: experiment_SKN7
experiment: experiment_USV1
experiment: experiment_DAL80
experiment: experiment_GZF3
experiment: experiment_INO2
region: regions_not_orf
background: background_adh1
experiment: experiment_SKO1
experiment: experiment_MSN2
experiment: experiment_SKN7
experiment: experiment_USV1
experiment: experiment_DAL80
experiment: experiment_GZF3
experiment: experiment_INO2
background: background_dSir4
experiment: experiment_SKO1
experiment: experiment_MSN2
experiment: experiment_SKN7
experiment: experiment_USV1
experiment: experiment_DAL80
experiment: experiment_GZF3
experiment: experiment_INO2
