# Check read numbers on each chromosome for signs of aneuploidy

## Load data and libraries

In [2]:
import pandas as pd
import numpy as np

P_table = pd.read_csv('/Users/chrisgraves/Documents/Yeast_data/Sequencing/alignments/read_frac/P2-0_reads_per_chrom.tsv',sep='\t')
P_table = P_table[P_table['Chrom']!= '*']
P_table

Unnamed: 0,Chrom,Length,Num_reads,Unmapped
0,ref|NC_001133|,230218,97844,16
1,ref|NC_001134|,813184,297276,0
2,ref|NC_001135|,316620,129241,1
3,ref|NC_001136|,1531933,528077,3
4,ref|NC_001137|,576874,220972,0
5,ref|NC_001138|,270161,116264,4
6,ref|NC_001139|,1090940,388487,0
7,ref|NC_001140|,562643,224200,3
8,ref|NC_001141|,439888,171562,7
9,ref|NC_001142|,745751,276970,0


In [3]:
#create dictionary to change chromosome names
chrom_key = list(('ref|NC_001133|','ref|NC_001134|','ref|NC_001135|','ref|NC_001136|','ref|NC_001137|','ref|NC_001138|','ref|NC_001139|','ref|NC_001140|','ref|NC_001141|','ref|NC_001142|','ref|NC_001143|','ref|NC_001144|','ref|NC_001145|','ref|NC_001146|','ref|NC_001147|','ref|NC_001148|','ref|NC_001224|'))
chrom_num = list(['I','II','III','IV','V','VI','VII','VIII','IX','X','XI','XII','XIII','XIV','XV','XVI','MITO'])
chrom_dict = dict(zip(chrom_key,chrom_num))

In [4]:
#rename chromosomes
P_table['Chrom'] = P_table['Chrom'].apply(lambda x: chrom_dict[str(x)])
P_table

Unnamed: 0,Chrom,Length,Num_reads,Unmapped
0,I,230218,97844,16
1,II,813184,297276,0
2,III,316620,129241,1
3,IV,1531933,528077,3
4,V,576874,220972,0
5,VI,270161,116264,4
6,VII,1090940,388487,0
7,VIII,562643,224200,3
8,IX,439888,171562,7
9,X,745751,276970,0


# Calculate fraction of reads mapping to each chromosome

In [5]:
#P_table = P_table.loc[P_table['Chrom']!='MITO']
vals = P_table.as_matrix(['Num_reads'])
fraction = vals/np.float(vals.sum())
P_table['Frac'] = fraction
P_table

Unnamed: 0,Chrom,Length,Num_reads,Unmapped,Frac
0,I,230218,97844,16,0.020787
1,II,813184,297276,0,0.063156
2,III,316620,129241,1,0.027457
3,IV,1531933,528077,3,0.11219
4,V,576874,220972,0,0.046945
5,VI,270161,116264,4,0.0247
6,VII,1090940,388487,0,0.082534
7,VIII,562643,224200,3,0.047631
8,IX,439888,171562,7,0.036448
9,X,745751,276970,0,0.058842


In [6]:
P_dict = dict(zip(chrom_num,fraction.tolist()))
P_dict['IV'][0]

0.1121896697728193

In [7]:
import os

counter = 0

for f in os.listdir(/Users/chrisgraves/Documents/Yeast_data/Sequencing/alignments/read_frac):
    char_list = list(f)
    if (char_list[0] == 'H') | (char_list[0] == 'C'):
        treatment = char_list[0]
        strain_split = f.split('-')
        strain = int(strain_split[0][1:len(strain_split[0])])
        time_split = strain_split[1].split('_')
        time = int(time_split[0])
        
        temp_table = pd.read_csv(f,sep='\t')
        temp_table = temp_table[temp_table['Chrom']!= '*']
        temp_table['Chrom'] = temp_table['Chrom'].apply(lambda x: chrom_dict[str(x)])
        #temp_table = temp_table[temp_table['Chrom'] != 'MITO']
        vals = temp_table.as_matrix(['Num_reads'])
        frac = vals/np.float(vals.sum())
        
        temp_table['Frac'] = frac
        temp_table['Treatment'] = treatment
        temp_table['Strain'] = strain
        temp_table['Time'] = time
        
        
        if counter == 0:
            table = temp_table
        else:        
            table = pd.concat([table,temp_table],ignore_index=True)
            
        counter=counter+1
table

Unnamed: 0,Chrom,Length,Num_reads,Unmapped,Frac,Treatment,Strain,Time
0,I,230218,80115,8,0.022619,C,1,12
1,II,813184,224426,1,0.063363,C,1,12
2,III,316620,103523,2,0.029228,C,1,12
3,IV,1531933,370186,1,0.104516,C,1,12
4,V,576874,170980,0,0.048273,C,1,12
5,VI,270161,95752,3,0.027034,C,1,12
6,VII,1090940,277906,0,0.078462,C,1,12
7,VIII,562643,174662,3,0.049313,C,1,12
8,IX,439888,133741,4,0.037760,C,1,12
9,X,745751,208348,2,0.058824,C,1,12


In [108]:
(1920/6)/20

16

In [9]:
table['Anc_frac'] = table['Chrom'].apply(lambda x: P_dict[str(x)][0])
table

Unnamed: 0,Chrom,Length,Num_reads,Unmapped,Frac,Treatment,Strain,Time,Anc_frac
0,I,230218,80115,8,0.022619,C,1,12,0.020787
1,II,813184,224426,1,0.063363,C,1,12,0.063156
2,III,316620,103523,2,0.029228,C,1,12,0.027457
3,IV,1531933,370186,1,0.104516,C,1,12,0.112190
4,V,576874,170980,0,0.048273,C,1,12,0.046945
5,VI,270161,95752,3,0.027034,C,1,12,0.024700
6,VII,1090940,277906,0,0.078462,C,1,12,0.082534
7,VIII,562643,174662,3,0.049313,C,1,12,0.047631
8,IX,439888,133741,4,0.037760,C,1,12,0.036448
9,X,745751,208348,2,0.058824,C,1,12,0.058842


In [10]:
table['Cov_ratio'] = table['Frac']/table['Anc_frac']
table.head()

Unnamed: 0,Chrom,Length,Num_reads,Unmapped,Frac,Treatment,Strain,Time,Anc_frac,Cov_ratio
0,I,230218,80115,8,0.022619,C,1,12,0.020787,1.088145
1,II,813184,224426,1,0.063363,C,1,12,0.063156,1.003276
2,III,316620,103523,2,0.029228,C,1,12,0.027457,1.064495
3,IV,1531933,370186,1,0.104516,C,1,12,0.11219,0.931601
4,V,576874,170980,0,0.048273,C,1,12,0.046945,1.028289


In [11]:
doubles = table[table['Cov_ratio'] > 1.8]
doubles

Unnamed: 0,Chrom,Length,Num_reads,Unmapped,Frac,Treatment,Strain,Time,Anc_frac,Cov_ratio
33,MITO,85779,86535,0,0.047474,C,1,1,0.007694,6.169813
50,MITO,85779,229694,0,0.050260,C,1,3,0.007694,6.531985
67,MITO,85779,213602,0,0.035926,C,1,5,0.007694,4.669070
84,MITO,85779,80086,0,0.019849,C,1,7,0.007694,2.579641
101,MITO,85779,145349,0,0.032900,C,1,9,0.007694,4.275831
118,MITO,85779,97284,0,0.023423,C,10,12,0.007694,3.044088
135,MITO,85779,78886,0,0.041480,C,10,1,0.007694,5.390896
152,MITO,85779,163425,0,0.029212,C,10,3,0.007694,3.796470
169,MITO,85779,126326,0,0.019284,C,10,5,0.007694,2.506183
186,MITO,85779,209367,0,0.050680,C,10,7,0.007694,6.586579


In [12]:
doubles = table[(table['Cov_ratio'] > 1.3) & (table['Chrom'] != 'MITO')]
doubles

Unnamed: 0,Chrom,Length,Num_reads,Unmapped,Frac,Treatment,Strain,Time,Anc_frac,Cov_ratio
34,I,230218,125918,6,0.027553,C,1,3,0.020787,1.325481
39,VI,270161,150300,2,0.032888,C,1,3,0.0247,1.331477
96,XII,1078177,808833,1,0.183083,C,1,9,0.12954,1.413335
113,XII,1078177,720059,6,0.173366,C,10,12,0.12954,1.338325
147,XII,1078177,1013887,7,0.18123,C,10,3,0.12954,1.399035
164,XII,1078177,1162009,5,0.177382,C,10,5,0.12954,1.369327
181,XII,1078177,733211,6,0.177485,C,10,7,0.12954,1.370119
340,I,230218,191456,6,0.031591,C,3,3,0.020787,1.519759
342,III,316620,226358,3,0.03735,C,3,3,0.027457,1.360303
345,VI,270161,221289,4,0.036514,C,3,3,0.0247,1.478273


### Conclusions - no evidence to support polyploidy
There are up to 10 fold differences in coverage of mitochondrial sequences between libraries compared to coverage in the ancestor. Only a few nuclear chromosomes have strong increases in coverage relative to the ancestor but nothing in the time series that would suggest duplication of an entire chromosome in one of the lineages.