In [1]:
%%bash
samtools mpileup --BCF \
--fasta ./input_data/ref.fasta \
./samtools_out/lsdv_51_sorted_rmdup.bam > ./samtools_out/lsdv_51_vars_indels_in.bcf

[fai_load] build FASTA index.
[mpileup] 1 samples in 1 input files
<mpileup> Set max per-file depth to 8000


In [2]:
%%bash
bcftools call \
--consensus-caller \
--variants-only \
--ploidy-file ./samtools_out/ploid.txt \
./samtools_out/lsdv_51_vars_indels_in.bcf > ./samtools_out/lsdv_51_vars_indels_in.vcf

#### trying to normalize indels

In [15]:
%%bash
bcftools norm -f ./input_data/ref.fasta ./samtools_out/lsdv_51_vars_indels_in.vcf \
-o ./samtools_out/lsdv_51_vars_indels_in_norm.vcf

Lines   total/split/realigned/skipped:	43/0/22/0


In [1]:
from sequman import count_snps, count_indels, vcf_to_df

In [13]:
help(count_snps)

Help on function count_snps in module sequman:

count_snps(vcf_file, min_depth=10, verbose='True')
    counts SNPs in vcf file
    ----------------
    vcf_file: str
        input vcf
    min_depth: int
        minimum depth in favour of SNPs, 10 by default
    verbose: bool
        True - prints information about the variants
        False - keeps silent



In [15]:
count_snps("./samtools_out/lsdv_51_vars_indels_in.vcf")

chromosome: MH893760.2, position: 66862, ref: G, snp variant: [A]
depth at position: 137
reads supporting reference: 1
reads supporting snp variant: 133
chromosome: MH893760.2, position: 73757, ref: A, snp variant: [T]
depth at position: 82
reads supporting reference: 0
reads supporting snp variant: 77
chromosome: MH893760.2, position: 117638, ref: A, snp variant: [T]
depth at position: 116
reads supporting reference: 0
reads supporting snp variant: 112
chromosome: MH893760.2, position: 122070, ref: T, snp variant: [C]
depth at position: 57
reads supporting reference: 3
reads supporting snp variant: 48
chromosome: MH893760.2, position: 122071, ref: C, snp variant: [T]
depth at position: 57
reads supporting reference: 7
reads supporting snp variant: 48
chromosome: MH893760.2, position: 135500, ref: A, snp variant: [T]
depth at position: 54
reads supporting reference: 0
reads supporting snp variant: 53
chromosome: MH893760.2, position: 145320, ref: C, snp variant: [T]
depth at position: 

0

In [3]:
count_indels("./samtools_out/lsdv_51_vars_indels_in.vcf")

chromosome: MH893760.2, position: 4802, ref: GTTTTTTTTT, indel variant: [GTTTTTTTT]
depth at position: 96
reads supporting reference: 0
reads supporting indel variant: 85
chromosome: MH893760.2, position: 10739, ref: ATTTTTTTTTT, indel variant: [ATTTTTTTTTTT]
depth at position: 94
reads supporting reference: 1
reads supporting indel variant: 80
chromosome: MH893760.2, position: 11566, ref: TTTATTAT, indel variant: [TTTAT]
depth at position: 118
reads supporting reference: 0
reads supporting indel variant: 113
chromosome: MH893760.2, position: 11571, ref: TATAA, indel variant: [TA]
depth at position: 120
reads supporting reference: 2
reads supporting indel variant: 107
chromosome: MH893760.2, position: 12055, ref: TTGT, indel variant: [TTGTGT]
depth at position: 117
reads supporting reference: 5
reads supporting indel variant: 58
chromosome: MH893760.2, position: 12057, ref: GTTTTTTTTTTT, indel variant: [GTTTTTTTTTTTTT, GTTTTTTTTTTTTTT]
depth at position: 116
reads supporting reference:

0

In [8]:
df = vcf_to_df("./samtools_out/lsdv_51_vars_indels_in.vcf", var_type="snp")

In [12]:
df.to_excel("./samtools_out/lsdv_51_snps.xlsx")

In [4]:
df_indels = vcf_to_df("./samtools_out/lsdv_51_vars_indels_in.vcf", var_type="indel")

In [5]:
df_indels.to_excel("./samtools_out/lsdv_51_indels.xlsx")

In [3]:
import vcf

In [27]:
vcf_reader = vcf.Reader(open('./samtools_out/lsdv_51_vars_indels_in.vcf', 'r'))
counter = 0
for record in vcf_reader:
   
    print(record)
    print(record.CHROM)
    print(record.INFO["DP"])
    print(record.INFO["DP4"])
    print(record.INFO)
    print("--------------------")
    counter += 1

print(counter)
    

Record(CHROM=MH893760.2, POS=4802, REF=GTTTTTTTTT, ALT=[GTTTTTTTT])
MH893760.2
96
[0, 0, 34, 51]
{'INDEL': True, 'IDV': 88, 'IMF': 0.916667, 'DP': 96, 'VDB': 0.666497, 'SGB': -0.693147, 'MQSB': 0.625439, 'MQ0F': 0.0208333, 'AF1': 1.0, 'AC1': 1.0, 'DP4': [0, 0, 34, 51], 'MQ': 41, 'FQ': -999.0}
--------------------
Record(CHROM=MH893760.2, POS=10739, REF=ATTTTTTTTTT, ALT=[ATTTTTTTTTTT])
MH893760.2
94
[1, 0, 40, 40]
{'INDEL': True, 'IDV': 82, 'IMF': 0.87234, 'DP': 94, 'VDB': 0.900269, 'SGB': -0.693147, 'MQSB': 0.922574, 'MQ0F': 0.0106383, 'AF1': 1.0, 'AC1': 1.0, 'DP4': [1, 0, 40, 40], 'MQ': 42, 'FQ': -999.0, 'PV4': [1.0, 1.0, 0.435685, 0.310837]}
--------------------
Record(CHROM=MH893760.2, POS=11566, REF=TTTATTAT, ALT=[TTTAT])
MH893760.2
118
[0, 0, 41, 72]
{'INDEL': True, 'IDV': 111, 'IMF': 0.940678, 'DP': 118, 'VDB': 0.0807615, 'SGB': -0.693147, 'MQSB': 0.888318, 'MQ0F': 0.0254237, 'AF1': 1.0, 'AC1': 1.0, 'DP4': [0, 0, 41, 72], 'MQ': 41, 'FQ': -999.0}
--------------------
Record(CHROM=

In [11]:
vcf_reader = vcf.Reader(open('./samtools_out/lsdv_51_vars_indels_in_norm.vcf', 'r'))
counter = 0
for record in vcf_reader:
   
    print(record)
    print(record.INFO["DP"])
    print(record.INFO["DP4"])
    print(record.INFO)
    print("--------------------")
    counter += 1

print(counter)

Record(CHROM=MH893760.2, POS=4802, REF=GT, ALT=[G])
96
[0, 0, 34, 51]
{'INDEL': True, 'IDV': 88, 'IMF': 0.916667, 'DP': 96, 'VDB': 0.666497, 'SGB': -0.693147, 'MQSB': 0.625439, 'MQ0F': 0.0208333, 'AF1': 1.0, 'AC1': 1.0, 'DP4': [0, 0, 34, 51], 'MQ': 41, 'FQ': -999.0}
--------------------
Record(CHROM=MH893760.2, POS=10739, REF=A, ALT=[AT])
94
[1, 0, 40, 40]
{'INDEL': True, 'IDV': 82, 'IMF': 0.87234, 'DP': 94, 'VDB': 0.900269, 'SGB': -0.693147, 'MQSB': 0.922574, 'MQ0F': 0.0106383, 'AF1': 1.0, 'AC1': 1.0, 'DP4': [1, 0, 40, 40], 'MQ': 42, 'FQ': -999.0, 'PV4': [1.0, 1.0, 0.435685, 0.310837]}
--------------------
Record(CHROM=MH893760.2, POS=11566, REF=TTTA, ALT=[T])
118
[0, 0, 41, 72]
{'INDEL': True, 'IDV': 111, 'IMF': 0.940678, 'DP': 118, 'VDB': 0.0807615, 'SGB': -0.693147, 'MQSB': 0.888318, 'MQ0F': 0.0254237, 'AF1': 1.0, 'AC1': 1.0, 'DP4': [0, 0, 41, 72], 'MQ': 41, 'FQ': -999.0}
--------------------
Record(CHROM=MH893760.2, POS=11571, REF=TATA, ALT=[T])
120
[1, 1, 40, 67]
{'INDEL': True, 