In [2]:
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
import pandas

#chrom changes file
col_names = ['Read ID', 'chromosome', 'Y Starting Base Position', 'Percent Match', 'Second Position', 'ChangeTo', 'Starting Base Position', 'Percent Match2', 'Second Position2']
chrom_changes = pandas.read_table( 
    'LP6005636-DNA_H02.chrom_changes.txt', #the file we are reading in
    names= col_names)#names of the columns
 
chrom_changes.set_index("Read ID", drop=True, inplace = True) #sets index to first col
chrom_changes.head()

Unnamed: 0_level_0,chromosome,Y Starting Base Position,Percent Match,Second Position,ChangeTo,Starting Base Position,Percent Match2,Second Position2
Read ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
C2D1RACXX_1:8:1101:1000601:0,chrY,13484087,100M,=,chr18,108134,38S44M18S,=
C2D1RACXX_1:8:1101:1000601:0,chrY,13484087,100M,=,chr18,107998,5S91M4S,=
C2D1RACXX_1:8:1101:1000739:0,chrY,13664682,3S84M13S,=,chr4,49105559,48M5D52M,chr21
C2D1RACXX_1:8:1101:1000739:0,chrY,13664682,3S84M13S,=,chr21,10776646,56M44S,chr4
C2D1RACXX_1:8:1101:1001197:0,chrY,26046243,100M,=,chr10,59885544,61S25M14S,=


In [3]:
#Creating a list of chroms for iteration, and a dictionary of chrom --> chrom length to configure the axes of the graph
chroms = ['chr%s' % i for i in range(1, 23) + ['X', 'M']]

# lengths of chromosomes taken from https://genome.ucsc.edu/goldenpath/help/hg19.chrom.sizes
# lengths listed in order from 1-22, X, M, Y
chr_lengths = [249250621, 243199373,198022430,191154276, 180915260, 
               171115067, 159138663, 146364022, 141213431, 
               135534747, 135006516, 133851895, 115169878, 107349540, 
               102531392, 90354753, 81195210, 78077248, 59128983,
               63025520, 48129895, 51304566, 155270560, 16571, 59373566]  


chr_lengths_dict = {}
for i, chrom in enumerate(chroms):
    chr_lengths_dict[chrom] = chr_lengths[i] 
chr_lengths_dict['chrY'] = chr_lengths[len(chr_lengths) - 1]


In [14]:
#Scatterplots for chrY vs chr1 - chr10
#pp = PdfPages('chrom_changes scatter plots.pdf')
for chrom in chroms[0:10]:
    subset = chrom_changes.loc[chrom_changes['ChangeTo'] == chrom] 
    ax.set_title("Scatter Plot of chrY read vs %s read" %chrom)
    ax.set_ylabel('%s Starting BP Position of read' %chrom)
    ax.set_xlabel('chrY Starting BP Position of read')
    print(chrom)
    print(len(subset['Y Starting Base Position']))
    print(len(subset['Starting Base Position']))
    print(subset['Starting Base Position'].head())
    print(subset['Y Starting Base Position'].head())
    ax.scatter(subset['Y Starting Base Position'][0:100], subset['Starting Base Position'][0:100], s = 5)
    plt.axis([0, chr_lengths_dict['chrY'], 0, chr_lengths_dict[chrom]])
    #plt.savefig(pp, format='pdf')
    plt.show()
#pp.close()



chr1
445196
445196
Read ID
C2D1RACXX_1:8:1101:1001256:0    119721809
C2D1RACXX_1:8:1101:100254:0        126003
C2D1RACXX_1:8:1101:100254:0        126258
C2D1RACXX_1:8:1101:1003477:0    119720918
C2D1RACXX_1:8:1101:1009740:0    142541236
Name: Starting Base Position, dtype: int64
Read ID
C2D1RACXX_1:8:1101:1001256:0    16000191
C2D1RACXX_1:8:1101:100254:0     26432337
C2D1RACXX_1:8:1101:100254:0     26432337
C2D1RACXX_1:8:1101:1003477:0    25225021
C2D1RACXX_1:8:1101:1009740:0    58979357
Name: Y Starting Base Position, dtype: int64
chr2
461781
461781
Read ID
C2D1RACXX_1:8:1101:1004727:0    172000034
C2D1RACXX_1:8:1101:1006057:0     89860409
C2D1RACXX_1:8:1101:1008044:0     96598291
C2D1RACXX_1:8:1101:1008044:0     96598397
C2D1RACXX_1:8:1101:1012259:0     89860431
Name: Starting Base Position, dtype: int64
Read ID
C2D1RACXX_1:8:1101:1004727:0    18647583
C2D1RACXX_1:8:1101:1006057:0    13458393
C2D1RACXX_1:8:1101:1008044:0    28579517
C2D1RACXX_1:8:1101:1008044:0    28579517
C2D1RACXX_

In [3]:
#Scatterplots for chrY vs chr11 - chrM
pp2 = PdfPages('chrom_changes scatter plots2.pdf')
for chrom in chroms[10:]:
    subset = chrom_changes.loc[chrom_changes['ChangeTo'] == chrom] #change the chr14 to whatever is in chrom list when looping through all chroms
    fig, ax = plt.subplots() 
    ax.set_title("Scatter Plot of chrY read vs %s read" %chrom)
    ax.set_ylabel('%s Starting BP Position of read' %chrom)
    ax.set_xlabel('chrY Starting BP Position of read')
    ax.scatter(subset['Y Starting Base Position'], subset['Starting Base Position'], s = 5)
    plt.axis([0, chr_lengths_dict['chrY'], 0, chr_lengths_dict[chrom]])
    plt.savefig(pp2, format='pdf')
pp2.close()

In [None]:
chrom_changes.groupby('ChangeTo').size()