In [1]:
# For parsing VCF file
import vcf

In [2]:
Chromosomes = ['A01','A02','A03','A04','A05','A06','A07','A08','A09','A10',]

In [3]:
# Open a VCF Reader to parse the file
R09vcf = vcf.Reader(filename='R09-homozygous-variants.snpeff.vcf.gz')

In [4]:
# get the sample name
sample = R09vcf.samples[0]

In [10]:
# The per-chromosome Ts and Tv counts
R09_SNPs = {CHROM: {'Ts': 0, 'Tv': 0, 'A/G': 0, 'G/A': 0, 'C/T': 0, 'T/C': 0,
                    'A/C': 0, 'A/T': 0, 'G/C': 0, 'G/T': 0, 'C/A': 0, 'C/G': 0, 'T/A':0, 'T/G': 0} for CHROM in Chromosomes}
# The genome-wide Ts and Tv counts
R09_SNPs_Total = {'Ts': 0, 'Tv': 0, 'A/G': 0, 'G/A': 0, 'C/T': 0, 'T/C': 0,
                  'A/C': 0, 'A/T': 0, 'G/C': 0, 'G/T': 0, 'C/A': 0, 'C/G': 0, 'T/A':0, 'T/G': 0}

In [11]:
# Calculate from SNPs the Ts and Tv of a Chromosome
for CHROM in Chromosomes:
    CHROMvcf = R09vcf.fetch(CHROM, 0)
    for record in CHROMvcf:
        if record.is_snp:
            
            # Get the reference nt
            ref = record.alleles[0]
            # Get the called nts
            mat, pat = record.genotype(sample).gt_bases.split('/') # data is un-phased so separator will always be '/'
            
            # Note: all calls are homozygous in these files,
            # so we only need to use either mat or pat call for counting
            if record.is_transition:
                R09_SNPs[CHROM]['Ts'] += 1
                R09_SNPs_Total['Ts'] += 1
            else:
                R09_SNPs[CHROM]['Tv'] += 1
                R09_SNPs_Total['Tv'] += 1
            
            # Now add count for specific substitution
            R09_SNPs[CHROM]['/'.join([ref, mat])] += 1
            R09_SNPs_Total['/'.join([ref, mat])] += 1

In [12]:
R09_SNPs

{'A01': {'A/C': 5336,
  'A/G': 12652,
  'A/T': 6487,
  'C/A': 5422,
  'C/G': 3186,
  'C/T': 13027,
  'G/A': 13038,
  'G/C': 3193,
  'G/T': 5265,
  'T/A': 6585,
  'T/C': 12641,
  'T/G': 5164,
  'Ts': 51212,
  'Tv': 40784},
 'A02': {'A/C': 3629,
  'A/G': 8456,
  'A/T': 4585,
  'C/A': 3681,
  'C/G': 2003,
  'C/T': 8910,
  'G/A': 8859,
  'G/C': 2115,
  'G/T': 3687,
  'T/A': 4467,
  'T/C': 8493,
  'T/G': 3561,
  'Ts': 34605,
  'Tv': 27841},
 'A03': {'A/C': 5572,
  'A/G': 13897,
  'A/T': 7302,
  'C/A': 5812,
  'C/G': 3647,
  'C/T': 14114,
  'G/A': 14034,
  'G/C': 3636,
  'G/T': 5763,
  'T/A': 7216,
  'T/C': 13722,
  'T/G': 5549,
  'Ts': 55674,
  'Tv': 44590},
 'A04': {'A/C': 3907,
  'A/G': 10056,
  'A/T': 5239,
  'C/A': 3998,
  'C/G': 2463,
  'C/T': 9901,
  'G/A': 10150,
  'G/C': 2432,
  'G/T': 4008,
  'T/A': 5247,
  'T/C': 9875,
  'T/G': 3935,
  'Ts': 39901,
  'Tv': 31310},
 'A05': {'A/C': 4655,
  'A/G': 11124,
  'A/T': 5833,
  'C/A': 4825,
  'C/G': 2867,
  'C/T': 11361,
  'G/A': 11439,
  '

In [13]:
R09_SNPs_Total

{'A/C': 51733,
 'A/G': 127828,
 'A/T': 65710,
 'C/A': 52560,
 'C/G': 32294,
 'C/T': 129613,
 'G/A': 129880,
 'G/C': 32257,
 'G/T': 52244,
 'T/A': 65492,
 'T/C': 126930,
 'T/G': 51423,
 'Ts': 513067,
 'Tv': 404897}

In [15]:
513067/404897.0

1.2671543627144681

In [16]:
513067+404897

917964

In [17]:
# Open a VCF Reader to parse the file
S11vcf = vcf.Reader(filename='S11-homozygous-variants.snpeff.vcf.gz')

In [18]:
# get the sample name
sample = S11vcf.samples[0]

In [19]:
# The per-chromosome Ts and Tv counts
S11_SNPs = {CHROM: {'Ts': 0, 'Tv': 0, 'A/G': 0, 'G/A': 0, 'C/T': 0, 'T/C': 0,
                    'A/C': 0, 'A/T': 0, 'G/C': 0, 'G/T': 0, 'C/A': 0, 'C/G': 0, 'T/A':0, 'T/G': 0} for CHROM in Chromosomes}
# The genome-wide Ts and Tv counts
S11_SNPs_Total = {'Ts': 0, 'Tv': 0, 'A/G': 0, 'G/A': 0, 'C/T': 0, 'T/C': 0,
                  'A/C': 0, 'A/T': 0, 'G/C': 0, 'G/T': 0, 'C/A': 0, 'C/G': 0, 'T/A':0, 'T/G': 0}

In [20]:
# Calculate from SNPs the Ts and Tv of a Chromosome
for CHROM in Chromosomes:
    CHROMvcf = S11vcf.fetch(CHROM, 0)
    for record in CHROMvcf:
        if record.is_snp:
            
            # Get the reference nt
            ref = record.alleles[0]
            # Get the called nts
            mat, pat = record.genotype(sample).gt_bases.split('/') # data is un-phased so separator will always be '/'
            
            # Note: all calls are homozygous in these files,
            # so we only need to use either mat or pat call for counting
            if record.is_transition:
                S11_SNPs[CHROM]['Ts'] += 1
                S11_SNPs_Total['Ts'] += 1
            else:
                S11_SNPs[CHROM]['Tv'] += 1
                S11_SNPs_Total['Tv'] += 1
            
            # Now add count for specific substitution
            S11_SNPs[CHROM]['/'.join([ref, mat])] += 1
            S11_SNPs_Total['/'.join([ref, mat])] += 1

In [21]:
S11_SNPs

{'A01': {'A/C': 4522,
  'A/G': 10602,
  'A/T': 5506,
  'C/A': 4560,
  'C/G': 2696,
  'C/T': 11108,
  'G/A': 11047,
  'G/C': 2658,
  'G/T': 4614,
  'T/A': 5573,
  'T/C': 10791,
  'T/G': 4604,
  'Ts': 43427,
  'Tv': 34854},
 'A02': {'A/C': 4845,
  'A/G': 12010,
  'A/T': 6254,
  'C/A': 4973,
  'C/G': 2884,
  'C/T': 12633,
  'G/A': 12542,
  'G/C': 3116,
  'G/T': 4943,
  'T/A': 6219,
  'T/C': 12359,
  'T/G': 4926,
  'Ts': 49424,
  'Tv': 38280},
 'A03': {'A/C': 5746,
  'A/G': 14306,
  'A/T': 7429,
  'C/A': 5938,
  'C/G': 3719,
  'C/T': 14442,
  'G/A': 14344,
  'G/C': 3736,
  'G/T': 5739,
  'T/A': 7471,
  'T/C': 14150,
  'T/G': 5700,
  'Ts': 57144,
  'Tv': 45576},
 'A04': {'A/C': 5090,
  'A/G': 13047,
  'A/T': 6629,
  'C/A': 5136,
  'C/G': 3085,
  'C/T': 12966,
  'G/A': 12777,
  'G/C': 3076,
  'G/T': 5018,
  'T/A': 6580,
  'T/C': 12756,
  'T/G': 5040,
  'Ts': 51430,
  'Tv': 39770},
 'A05': {'A/C': 5615,
  'A/G': 13966,
  'A/T': 7022,
  'C/A': 5504,
  'C/G': 3353,
  'C/T': 13771,
  'G/A': 1380

In [22]:
S11_SNPs_Total

{'A/C': 52320,
 'A/G': 130000,
 'A/T': 66027,
 'C/A': 52898,
 'C/G': 32442,
 'C/T': 131486,
 'G/A': 131267,
 'G/C': 32632,
 'G/T': 52570,
 'T/A': 66076,
 'T/C': 129390,
 'T/G': 52598,
 'Ts': 520935,
 'Tv': 408771}

In [23]:
520935/408771.0

1.274393242182053

In [24]:
520935+408771

929706

Now we will perform calculations on the unique data sets

In [3]:
# Open a VCF Reader to parse the file
S11vcf = vcf.Reader(filename='S11-homozygous-variants.snpeff.unique.vcf.gz')

In [4]:
# get the sample name
sample = S11vcf.samples[0]

In [5]:
# The per-chromosome Ts and Tv counts
S11_SNPs = {CHROM: {'Ts': 0, 'Tv': 0, 'A/G': 0, 'G/A': 0, 'C/T': 0, 'T/C': 0,
                    'A/C': 0, 'A/T': 0, 'G/C': 0, 'G/T': 0, 'C/A': 0, 'C/G': 0, 'T/A':0, 'T/G': 0} for CHROM in Chromosomes}
# The genome-wide Ts and Tv counts
S11_SNPs_Total = {'Ts': 0, 'Tv': 0, 'A/G': 0, 'G/A': 0, 'C/T': 0, 'T/C': 0,
                  'A/C': 0, 'A/T': 0, 'G/C': 0, 'G/T': 0, 'C/A': 0, 'C/G': 0, 'T/A':0, 'T/G': 0}

In [6]:
# Calculate from SNPs the Ts and Tv of a Chromosome
for CHROM in Chromosomes:
    CHROMvcf = S11vcf.fetch(CHROM, 0)
    for record in CHROMvcf:
        if record.is_snp:
            
            # Get the reference nt
            ref = record.alleles[0]
            # Get the called nts
            mat, pat = record.genotype(sample).gt_bases.split('/') # data is un-phased so separator will always be '/'
            
            # Note: all calls are homozygous in these files,
            # so we only need to use either mat or pat call for counting
            if record.is_transition:
                S11_SNPs[CHROM]['Ts'] += 1
                S11_SNPs_Total['Ts'] += 1
            else:
                S11_SNPs[CHROM]['Tv'] += 1
                S11_SNPs_Total['Tv'] += 1
            
            # Now add count for specific substitution
            S11_SNPs[CHROM]['/'.join([ref, mat])] += 1
            S11_SNPs_Total['/'.join([ref, mat])] += 1

In [7]:
S11_SNPs

{'A01': {'A/C': 2104,
  'A/G': 5435,
  'A/T': 2838,
  'C/A': 2132,
  'C/G': 1400,
  'C/T': 5751,
  'G/A': 5670,
  'G/C': 1326,
  'G/T': 2201,
  'T/A': 2880,
  'T/C': 5653,
  'T/G': 2241,
  'Ts': 22459,
  'Tv': 17172},
 'A02': {'A/C': 2759,
  'A/G': 7573,
  'A/T': 3787,
  'C/A': 2849,
  'C/G': 1823,
  'C/T': 7901,
  'G/A': 7861,
  'G/C': 1908,
  'G/T': 2826,
  'T/A': 3834,
  'T/C': 7843,
  'T/G': 2854,
  'Ts': 31132,
  'Tv': 22686},
 'A03': {'A/C': 3084,
  'A/G': 8093,
  'A/T': 4134,
  'C/A': 3107,
  'C/G': 2108,
  'C/T': 8123,
  'G/A': 8117,
  'G/C': 2113,
  'G/T': 3078,
  'T/A': 4181,
  'T/C': 8046,
  'T/G': 3038,
  'Ts': 32336,
  'Tv': 24886},
 'A04': {'A/C': 2842,
  'A/G': 7476,
  'A/T': 3816,
  'C/A': 2914,
  'C/G': 1729,
  'C/T': 7659,
  'G/A': 7402,
  'G/C': 1778,
  'G/T': 2819,
  'T/A': 3750,
  'T/C': 7355,
  'T/G': 2780,
  'Ts': 29827,
  'Tv': 22493},
 'A05': {'A/C': 2976,
  'A/G': 7964,
  'A/T': 3890,
  'C/A': 2853,
  'C/G': 1841,
  'C/T': 7772,
  'G/A': 7717,
  'G/C': 1890,
 

In [8]:
S11_SNPs_Total

{'A/C': 26321,
 'A/G': 70343,
 'A/T': 35366,
 'C/A': 26879,
 'C/G': 17370,
 'C/T': 71917,
 'G/A': 71469,
 'G/C': 17424,
 'G/T': 26893,
 'T/A': 35549,
 'T/C': 70195,
 'T/G': 26858,
 'Ts': 283376,
 'Tv': 213208}

In [9]:
283376/213208.0

1.329105849686691

In [10]:
283376 + 213208

496584

In [11]:
# Open a VCF Reader to parse the file
R09vcf = vcf.Reader(filename='R09-homozygous-variants.snpeff.unique.vcf.gz')

In [12]:
# get the sample name
sample = R09vcf.samples[0]

In [13]:
# The per-chromosome Ts and Tv counts
R09_SNPs = {CHROM: {'Ts': 0, 'Tv': 0, 'A/G': 0, 'G/A': 0, 'C/T': 0, 'T/C': 0,
                    'A/C': 0, 'A/T': 0, 'G/C': 0, 'G/T': 0, 'C/A': 0, 'C/G': 0, 'T/A':0, 'T/G': 0} for CHROM in Chromosomes}
# The genome-wide Ts and Tv counts
R09_SNPs_Total = {'Ts': 0, 'Tv': 0, 'A/G': 0, 'G/A': 0, 'C/T': 0, 'T/C': 0,
                  'A/C': 0, 'A/T': 0, 'G/C': 0, 'G/T': 0, 'C/A': 0, 'C/G': 0, 'T/A':0, 'T/G': 0}

In [14]:
# Calculate from SNPs the Ts and Tv of a Chromosome
for CHROM in Chromosomes:
    CHROMvcf = R09vcf.fetch(CHROM, 0)
    for record in CHROMvcf:
        if record.is_snp:
            
            # Get the reference nt
            ref = record.alleles[0]
            # Get the called nts
            mat, pat = record.genotype(sample).gt_bases.split('/') # data is un-phased so separator will always be '/'
            
            # Note: all calls are homozygous in these files,
            # so we only need to use either mat or pat call for counting
            if record.is_transition:
                R09_SNPs[CHROM]['Ts'] += 1
                R09_SNPs_Total['Ts'] += 1
            else:
                R09_SNPs[CHROM]['Tv'] += 1
                R09_SNPs_Total['Tv'] += 1
            
            # Now add count for specific substitution
            R09_SNPs[CHROM]['/'.join([ref, mat])] += 1
            R09_SNPs_Total['/'.join([ref, mat])] += 1

In [15]:
R09_SNPs

{'A01': {'A/C': 2918,
  'A/G': 7490,
  'A/T': 3810,
  'C/A': 3003,
  'C/G': 1884,
  'C/T': 7663,
  'G/A': 7654,
  'G/C': 1859,
  'G/T': 2861,
  'T/A': 3894,
  'T/C': 7494,
  'T/G': 2802,
  'Ts': 30239,
  'Tv': 23093},
 'A02': {'A/C': 1537,
  'A/G': 4021,
  'A/T': 2123,
  'C/A': 1562,
  'C/G': 939,
  'C/T': 4176,
  'G/A': 4181,
  'G/C': 906,
  'G/T': 1569,
  'T/A': 2084,
  'T/C': 3972,
  'T/G': 1490,
  'Ts': 16317,
  'Tv': 12243},
 'A03': {'A/C': 2919,
  'A/G': 7687,
  'A/T': 3993,
  'C/A': 2975,
  'C/G': 2044,
  'C/T': 7795,
  'G/A': 7814,
  'G/C': 2022,
  'G/T': 3086,
  'T/A': 3935,
  'T/C': 7623,
  'T/G': 2880,
  'Ts': 30863,
  'Tv': 23910},
 'A04': {'A/C': 1664,
  'A/G': 4482,
  'A/T': 2422,
  'C/A': 1771,
  'C/G': 1107,
  'C/T': 4597,
  'G/A': 4774,
  'G/C': 1140,
  'G/T': 1802,
  'T/A': 2412,
  'T/C': 4478,
  'T/G': 1667,
  'Ts': 18297,
  'Tv': 14019},
 'A05': {'A/C': 2014,
  'A/G': 5129,
  'A/T': 2699,
  'C/A': 2178,
  'C/G': 1359,
  'C/T': 5356,
  'G/A': 5358,
  'G/C': 1336,
  '

In [16]:
R09_SNPs_Total

{'A/C': 25733,
 'A/G': 68186,
 'A/T': 35034,
 'C/A': 26540,
 'C/G': 17225,
 'C/T': 70041,
 'G/A': 70105,
 'G/C': 17049,
 'G/T': 26548,
 'T/A': 34974,
 'T/C': 67730,
 'T/G': 25671,
 'Ts': 275551,
 'Tv': 209285}

In [17]:
275551/209285.0

1.3166304321857754

In [18]:
275551 + 209285

484836