## ================================= ##
## findZX config file (p herring) ##
## ================================= ##

# Variables marked with "[findZX]" or "[findZX-synteny]" are only used when deploying 
# the pipeline with either snakefile. Other variables are used for all analyses. 

threads_max: 4
mem_max: 8000 
  # Specify maximum number of cores [threads_max] and memory [mem_max] allocation


# ============================ #
# Analysis name and input data #

run_name: p_herring
  # Select an analysis name. Output files will be stored under "results/[run_name]"

units: /center1/GLASSLAB/salmgren/lcwgs/lcwgs_data/sex_determ/p_herring_units.tsv
  # Path to sample information file

ref_genome: /center1/GLASSLAB/salmgren/atlantic_herring/atlantic/atlantic_reference.fna 
  # Path to study-species reference genome (not .gz format)

synteny_ref: 
  # [findZX-synteny] Path to synteny-species reference genome (not .gz format)

synteny_name: 
  # [findZX-synteny] Synteny-species name (can be any string, will be used for file and directory names)


# ================= #
# Plotting settings #

window_sizes: [25000, 50000, 100000]
  # Choose genome window sizes for plotting (as many as you want)
  # Optimal sizes depend on reference genome fragmentation and size of the sex-linked region
  # Recommended sizes to start are: [50000, 100000, 1000000] (i.e. 50 kb, 100 kb, 1Mb)

chr_file: None
  # [findZX] Specify a file with list of chromosomes to only plot these (otherwise leave as "None")

chr_highlight: 
  # [findZX] Specify chromosomes/scaffolds to highlight in plot type 4, or leave empty

synteny_chr_file: None
  # [findZX-synteny] Specify a file with list of chromosomes to only plot these (otherwise leave as "None")

synteny_chr_highlight:
  # [findZX-synteny] Specify chromosomes/scaffolds to highlight in plot type 4, or leave empty


# ================================== #
# Trimming and subsampling of reads  #

## These three variables control trimming and subsampling of reads
## Set all to "false" to disable trimming and subsampling
## Only one variable is allowed to be "true"

trim_reads: false 
  # Set to true for trimming of reads

trim_and_subsample: false
  # Set to true for trimming and subsampling of reads

subsample_only: false
  # Set to true for subsampling of reads (but not trimming)

subsample_basepairs: 
  # Specify the total number of basepairs to extract from both fastq files
  # Will be used if [trim_and_subsample] or [subsample_basepairs] is set to "true"

  # Use this script to calculate expected coverage:
  # ./code/subsampling_cov_calv.sh <REF.fasta> <WANTED_COV> 


# ========================== #
# findZX-specific parameters # 
# ===== (edit if needed) ===== #
 
mismatch_settings: [0.0, 0.2]
  # Genome coverage results will be generated from the original BAM files ("unfiltered"),
  # and two other (modifiable) mismatches settings.
  # "0.0" = 0 mismatches allowed
  # "0.2" = <=2 mismatches allowed

minSizeScaffold: "10000"
  # The mimimum size of scaffolds in the reference genome to be included in the results


# ============================ #
# External software parameters # 
# ===== (edit if needed) ===== #