In [1]:
#Perform imports
import torch
import bigwig_dataset
from pyfaidx import Fasta
import locs
from pybedtools import BedTool
import pyBigWig
import math


ModuleNotFoundError: No module named 'torch'

In [2]:
#Function and class defs
class ConvolutionBlock(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super(ConvolutionBlock,self).__init__()
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.conv1 = torch.nn.Conv1d(
            in_channels,out_channels,kernel_size=3,stride=1,padding=1, bias=False
        )
        self.conv2 = torch.nn.Conv1d(
            in_channels,out_channels,kernel_size=3,stride=1,padding=1,bias=False
        )

    def forward(self,x):
        if (self.in_channels == self.out_channels):
          y = self.conv1(x)
          y = torch.nn.functional.relu(y)
          y = self.conv2(y)
          y = x + y
          return torch.nn.functional.relu(y)
        else:
          x = self.conv1(x)
          y = torch.nn.functional.relu(x)
          y = self.conv2(y)
          y = x + y
          return torch.nn.functional.relu(y)

class Model(torch.nn.Module):
    def __init__(self,n_blocks,max_channels):
        '''
            Crude model that attempts to learn a bigWig file from input Fasta file

            args:
                n_blocks - number of convolution "blocks" that the model will use. The architecture of these blocks was copied directly
                from EC414 Hw 10
                max_channels - maximum number of output channels for the convolution layers. This model will double the number of output channels
                for the model every (log2(max_channels)/n_blocks) layers.
        '''


        super().__init__()
        max_pow = math.log2(max_channels)

        #We want the initial 
        self.layer_out_channels = [2**(2+math.floor((max_pow-1)*i/n_blocks)) for i in range(n_blocks)]
        self.blocks = [self.make_layer(4,self.layer_out_channels[0])]
        for i in range(1,n_blocks):
            self.blocks.append(self.make_layer(self.layer_out_channels[i-1],self.layer_out_channels[i]))
        self.blocks = torch.nn.Sequential(*self.blocks)
        self.linear = torch.nn.Linear(round(2**max_pow),1)
    
    def forward(self,seq):
        '''
            Forward pass for our experiment model
            
            args:
                seq - (4,n) tensor of a one-hot encoded genome sequence
            
            output:
                (1,n) tensor of predictions
        '''
        x = self.blocks(seq)
        return self.linear(x)

    def make_layer(self, in_channels, out_channels):
        layers = [
            ConvolutionBlock(in_channels,out_channels),
            ConvolutionBlock(out_channels,out_channels)
        ]
        return torch.nn.Sequential(*layers)
    
def seqence_encoder(seq):
    dict= {
        65: torch.tensor([1,0,0,0]),
        67: torch.tensor([0,1,0,0]),
        71: torch.tensor([0,0,1,0]),
        85: torch.tensor([0,0,0,1])
    }
    out = torch.zeros((4,seq.shape[0]))
    for i, base_pair in enumerate(seq):
        out[:,i] = dict[base_pair]
    return out

In [3]:
#Initialize Data
data = bigwig_dataset.BigWigDataset(
    bigwig_files = locs.BIGWIG_FILE,
    reference_fasta_file=locs.HUMAN_FA,
    input_bed_file=locs.TILED_BED
)
#data_0 = data[0]


In [4]:
#Initialize model
pred = Model(n_blocks=15,max_channels=64)

optimizer = torch.optim.SGD(pred.parameters(), lr=0.1,momentum=0.9)
optimizer.zero_grad()
loss_fn = torch.nn.MSELoss()

In [6]:
#Test pipeline by performing one iteration of SGD

y_hat = pred(data_0['sequence'])
loss = torch.mean(loss_fn(y_hat,y))
loss.backward()
optimizer.step()
print(f'Loss of {loss} on prediction')

TypeError: conv1d() received an invalid combination of arguments - got (numpy.ndarray, Parameter, NoneType, tuple, tuple, tuple, int), but expected one of:
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, tuple of ints padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)
 * (Tensor input, Tensor weight, Tensor bias, tuple of ints stride, str padding, tuple of ints dilation, int groups)
      didn't match because some of the arguments have invalid types: (!numpy.ndarray!, !Parameter!, !NoneType!, !tuple!, !tuple!, !tuple!, int)
