In [1]:
import os
import configparser

In [2]:
BASE_PATH = os.getcwd()
configfile_name = os.path.join(BASE_PATH, "config_runAkita_ensembl-ancestral.ini") #name the config file

In [3]:
config = configparser.ConfigParser()

## Akita params and paths

In [4]:
AKITA_PARAMS = 'params.json'

MODEL = 'model_best.h5'

TARGETS = "targets.txt"

STATS = "statistics.json"

In [5]:
MODEL_DIR = '/wynton/group/capra/projects/modern_human_3Dgenome/bin/basenji/manuscripts/akita/'
DATA_DIR = '/wynton/group/capra/projects/modern_human_3Dgenome/bin/basenji/manuscripts/akita/data/'

## Erin's params and paths that *shouldn't* change

In [6]:
# Paths
BASE_PATH = "/".join(os.getcwd().split("/")[:-2]) # base directory level

BIN_PATH = os.path.join(BASE_PATH, "bin")  # where my scripts live
DATA_PATH = os.path.join(BASE_PATH, "data")  # where I dump new data.
RESULTS_PATH = os.path.join(BASE_PATH, "results")  # where I analyze results

SRC_PATH = os.path.join(BASE_PATH, "src")  # where any packages needed to run analyses live. I haven't started structuring things this way yet. 


In [7]:
# prefix on VCF files
GENOME_CHUNKS = '/wynton/group/capra/projects/modern_human_3Dgenome/data/reference/genome_chunks_large.txt'

In [8]:
OUT_PREDS = '/wynton/group/capra/projects/modern_human_3Dgenome/data/akitaPreds/3dpreds/'
OUT_COV = '/wynton/group/capra/projects/modern_human_3Dgenome/data/akitaPreds/coverage/'

In [9]:
LAUNCH_RUN_AKITA = os.path.join(BIN_PATH, 'runningAkita', 'runAkita.sh')

RUN_AKITA_INDIV = os.path.join(BIN_PATH, 'runningAkita', 'runAkita.one_individual.py')

## Genome source dependent params and paths

Just update the data source and file for list of individuals. If new data source, add relevant info to assignments below. 

In [10]:
IND_LIST = '/wynton/group/capra/projects/modern_human_3Dgenome/data/reference/lists/runAkitaLists/ensembl_106_ancestral.txt'
source = '1KG'

In [11]:
data_sources = dict.fromkeys(['1KG','ensembl','GAGP'])

#FASTA directories
fasta_dir_1KG = '/wynton/group/capra/projects/modern_human_3Dgenome/data/genomes/'
fasta_dir_ensembl = '/wynton/group/capra/data/hg38_ancestral/Ensembl_release_106/homo_sapiens_ancestor_GRCh38/'
fasta_dir_GAGP = '/wynton/group/capra/data/hg38_ancestral/GAGP/' 

#Naming conventions for fasta files
fasta_naming_1KG = '%%s/%%s/%%s_%%s_hg38_full.fa' 
naming_vars_1KG = '(pop,indiv,chrm,id)'

fasta_naming_GAGP = '%%s_with_GAGP_ancestral.fa'
naming_vars_GAGP = 'chrm'

fasta_naming_ensembl = 'homo_sapiens_ancestor_%%s.fa'
naming_vars_ensembl = 'chrm.strip("chr")'

for s in data_sources.keys():
    data_sources[s] = {}
    data_sources[s]['fasta_dir'] = eval(('fasta_dir_' + str(s)))
    data_sources[s]['fasta_naming'] = eval(('fasta_naming_' + str(s)))
    data_sources[s]['naming_vars'] = eval(('naming_vars_' + str(s)))

In [12]:
INPUT_FASTA_DIR = data_sources[source]['fasta_dir']
FASTA_NAMING = data_sources[source]['fasta_naming']
NAMING_VARS = data_sources[source]['naming_vars']

### Writing .ini file

In [13]:
config["FILE"] = {
    "GENOME_CHUNKS":GENOME_CHUNKS,
    "AKITA_PARAMS":AKITA_PARAMS,
    "MODEL":MODEL,
    "TARGETS":TARGETS,
    "STATS":STATS,
    "FASTA_NAMING":FASTA_NAMING,
    "NAMING_VARS":NAMING_VARS
}

config["PATH"] = {
    "BASE_PATH":BASE_PATH,
    "BIN_PATH":BIN_PATH,
    "DATA_PATH":DATA_PATH,
    "RESULTS_PATH":RESULTS_PATH,
    "SRC_PATH":SRC_PATH,
    "IND_LIST":IND_LIST,
    "MODEL_DIR":MODEL_DIR,
    "DATA_DIR":DATA_DIR,
    "INPUT_FASTA_DIR":INPUT_FASTA_DIR,
    "OUT_PREDS":OUT_PREDS,
    "OUT_COV":OUT_COV,
}

config["BIN"] = {
    "LAUNCH_RUN_AKITA":LAUNCH_RUN_AKITA,
    "RUN_AKITA_INDIV":RUN_AKITA_INDIV
}

### Write config file

In [14]:
if not os.path.isfile(configfile_name):
    with open(configfile_name, 'w') as configfile:
        config.write(configfile)  # write the config
        configfile.close()