## This is the initial network design for chromstem-net

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [27]:
import os
import torch
import pandas as pd
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, utils

This is the dataset generator class, to be loaded into the DataLoader tool

This is adapted from the pytorch tutorial: https://pytorch.org/tutorials/beginner/data_loading_tutorial.html

In [41]:
class ChromstemDataset(Dataset):
    """Chromstem Dataset"""
    
    def __init__(self,csv_file,root_dir,transform=None):
        """
        Args:
            csv_file (string): Master CSV file that will hold all the data
            root_dir (string): Directory where the datasets are labeled
            transform (callable, optional): Optional transformations to samples
        """
        
        f = open(csv_file,'r')
        csv_length = len(f.readlines()[-1].strip())
        f.close()
        col_names = ['File'] + ['Nucl%d' % i for i in np.arange(1,csv_length)]
        self.nucl_coords_frame_ = pd.read_csv(csv_file,engine='python',header=None,names=col_names)
        self.root_dir_ = root_dir
        self.transform_ = transform
    
    def read_chromstem(self,fnme):
        x,y,z,rho = np.loadtxt(fnme,comments='#',unpack=True)
        tensor = torch.empty(int(x[0]),int(y[0]),int(z[0]))
        
        for i,arr in enumerate(zip(x[1:],y[1:],z[1:])):
            tensor[int(arr)] = rho[i]
        
        # Convert to sparse matrix as data will be sparse
        tensor = torch.Sparse.FloatTensor(tensor)
        return tensor
    
    def __len__(self):
        return(len(self.nucl_coords_frame_))
    
    def __getitem__(self,idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()
        
        chromstem_name = os.path.join(self.root_dir_,self.nucl_coords_frame_.iloc[idx,0])
        chromstem = self.read_chromstem(chromstem_name)
        nucl_coords = self.nucl_coords_frame_.iloc[idx,1:]
        nucl_coords = np.array([nucl_coords])
        nucl_coords = nucl_coords.astype('float').reshape(-1,3)
        
        sample = {'chromstem' : chromstem, 'nucl_coords' : nucl_coords}
        
        if self.transform:
            sample = self.transform(sample)
            
        return sample
        

Here, would be useful to provide a plotting function for samples in the dataset.
The main input to this function is any sample from the dataset and it would output the density and "centromere" in a 3D plot.
To do so accurately, I am going to use the ipyvolume widget.

In [4]:
import numpy as np
import ipyvolume as ipv

In [5]:
def plot_sample(sample):
    ipv.figure()
    # Plot voxels of density
    ipv.scatter(sample['chromstem'][:,0],sample['chromstem'][:,1],sample['chromstem'][:,2], 
                color='blue',size=10,marker='box',opacity=0.5)
    
    # Plot nucleosome centers
    ipv.scatter(sample['nucl_coords'][:,0],sample['nucl_coords'][:,1],sample['nucl_coords'][:,2],
               color='red',size=5,marker='sphere')
    
    ipv.show()
    return

This is a quick test of the widget and plotting function before the data is loaded

In [6]:
sample = {'chromstem' :  np.asarray([[1,2,3]]), 'nucl_coords' : np.asarray([[0,1,2],[5,10,0.2],[10,3.3,2]])}
plot_sample(sample)

VBox(children=(Figure(camera=PerspectiveCamera(fov=46.0, position=(0.0, 0.0, 2.0), quaternion=(0.0, 0.0, 0.0, â€¦

Here is where the dataset is loaded in. The .csv file should be located in the current directory, so the root is '.'

In [42]:
chromstem_dataset = ChromstemDataset(csv_file='output_label_test.csv',
                                     root_dir='.')
print(chromstem_dataset[0])

TypeError: int() argument must be a string, a bytes-like object or a number, not 'tuple'