In [51]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pyBigWig

__Browser visualization:__

1) run this notebook: read in the correct file you want to visualize and format the pandas dataframe to have 4 columns (chr, start_pos, end_pos, value)
2) format the pandas dataframe to not have any overlapping regions
3) create 4 lists from the 4 pandas dataframe columns
4) initialize and open for writing the bigwig file
5) add header to the bigwig file
6) write data into the bigwig file
7) close file
8) read file back in and check that everything is correct
9) go to the neural_net_new/browser_visualisation/ folder from the terminal write: 
    * git add mybigwigfilethatIjustcreated.bw
    * git commit -m "pushing my new bigwig file because I am cool"
    * git push origin main
10) go to the USCS genome browser, go to my tracks and add a new track
    * example of the track line to input: track type=bigWig name="Example One" description="A bigWig file" bigDataUrl=http://genome.ucsc.edu/goldenPath/help/examples/bigWigExample.bw
    * replace the name, description with relevant information and get the raw bigwig file link from github and replace the bigDataUrl with this raw link
    * submit the track and check that everything looks correct
    * if you wish to create a collection, do a right click on the visualized track, create a new collection, add tracks there and then use merge option transparent to show transparent bar charts that overlap

## Read in data

In [52]:
data = pd.read_csv('../predictions/final_model_predictions_overlapping.tsv', sep='\t')
data

Unnamed: 0,#chr,start_pos,end_pos,C02M02
0,chr12,10025,10226,0.177507
1,chr12,10050,10251,0.223625
2,chr12,10075,10276,0.249664
3,chr12,10100,10301,0.232440
4,chr12,10125,10326,0.283539
...,...,...,...,...
6544547,chr22,50807950,50808151,0.270149
6544548,chr22,50807975,50808176,0.258913
6544549,chr22,50808000,50808201,0.263885
6544550,chr22,50808025,50808226,0.527807


In [53]:
chr12 = data.loc[data['#chr'] == "chr12"]
chr22 = data.loc[data['#chr'] == "chr22"]

In [54]:
chr22.head()

Unnamed: 0,#chr,start_pos,end_pos,C02M02
5172326,chr22,10510350,10510551,0.101109
5172327,chr22,10510375,10510576,0.144975
5172328,chr22,10510400,10510601,0.166753
5172329,chr22,10511375,10511576,0.249189
5172330,chr22,10511400,10511601,0.274572


In [55]:
chr22.tail()

Unnamed: 0,#chr,start_pos,end_pos,C02M02
6544547,chr22,50807950,50808151,0.270149
6544548,chr22,50807975,50808176,0.258913
6544549,chr22,50808000,50808201,0.263885
6544550,chr22,50808025,50808226,0.527807
6544551,chr22,50808050,50808251,1.194067


## Select relevant columns

In [58]:
chr22_rm = chr22.iloc[:,-1:]

In [59]:
chr22_rm.head()

Unnamed: 0,C02M02
5172326,0.101109
5172327,0.144975
5172328,0.166753
5172329,0.249189
5172330,0.274572


In [60]:
new_df = pd.concat([chr12.iloc[:,0:3],chr12.iloc[:,-1:]],axis = 1)

In [61]:
new_df.reset_index(drop=True, inplace=True) 

In [62]:
new_df.head()

Unnamed: 0,#chr,start_pos,end_pos,C02M02
0,chr12,10025,10226,0.177507
1,chr12,10050,10251,0.223625
2,chr12,10075,10276,0.249664
3,chr12,10100,10301,0.23244
4,chr12,10125,10326,0.283539


## Rename columns if needed

In [63]:
new_df = new_df.rename(columns = {"#chr": "chr"})

In [64]:
new_df.tail()

Unnamed: 0,chr,start_pos,end_pos,C02M02
5172321,chr12,133264625,133264826,0.280969
5172322,chr12,133264650,133264851,0.284651
5172323,chr12,133264675,133264876,0.214697
5172324,chr12,133264700,133264901,0.272894
5172325,chr12,133264725,133264926,0.461689


## Make windows non overlapping (the next one will begin where the last one ended)

In [65]:
start_pos_list_replace = new_df["start_pos"].tolist()
start_pos_list_replace = start_pos_list_replace[1:] + [new_df["end_pos"].tolist()[-1]]

In [66]:
new_df_non_overlap = new_df
new_df_non_overlap["end_pos"] = start_pos_list_replace
new_df_non_overlap.tail()

Unnamed: 0,chr,start_pos,end_pos,C02M02
5172321,chr12,133264625,133264650,0.280969
5172322,chr12,133264650,133264675,0.284651
5172323,chr12,133264675,133264700,0.214697
5172324,chr12,133264700,133264725,0.272894
5172325,chr12,133264725,133264926,0.461689


## Initialize the bigwig file and add header

In [67]:
bw = pyBigWig.open("bigwig_chr_22.bw", "w")
bw.addHeader([("chr22", 50818468)])

# chromosome lengths can be for hg38 can be found from here: https://www.ncbi.nlm.nih.gov/assembly/GCF_000001405.39

## Get lists from pandas dataframe columns

In [68]:
chrom_list = new_df_non_overlap["chr"].tolist()
chrom_list = np.array(["chr22"] * len(chrom_list)) 
start_pos_list = new_df_non_overlap["start_pos"].tolist()
end_pos_list = new_df_non_overlap["end_pos"].tolist()
target_list = new_df_non_overlap["C02M02"].tolist()
target_list[0:6]

[0.17750706, 0.22362502, 0.24966374, 0.23244017, 0.28353882, 0.20151606]

## Write data to bigwig file

In [69]:
for i in range(0, len(chrom_list)):
    bw.addEntries([chrom_list[i]], [start_pos_list[i]], ends=[end_pos_list[i]], values=[target_list[i]])

## Close the created bigwig file

In [74]:
bw.close()

## Read file in again and check that everything is correct

In [75]:
C02M02 = pyBigWig.open("bigwig_chr_22.bw")

In [76]:
C02M02.isBigWig()

True

In [77]:
C02M02.chroms()

{'chr22': 50818468}

In [79]:
C02M02.values("chr22", 5010000, 5010003)

[0.6285964250564575, 0.6285964250564575, 0.6285964250564575]