In [7]:
import torch as t
import torch.nn as nn
import matplotlib.pyplot as plt
from torch.utils.data import Dataset
import h5py
import numpy as np
import librosa
import copy

In [8]:
class ArtistDataset(Dataset):
    def __init__(self, data, distribution = [], sr=22050):
        self.data = (data-np.mean(copy.copy(data)))/np.std(copy.copy(data))
        self.sr = sr
        self.distribution = distribution

    def __len__(self):
        return self.data.shape[0]

    def __getitem__(self, index):
        x = copy.deepcopy(self.data[index])
        
        if np.random.random(0,1) < 0.5:
            x = self.pitch_shift(x)
        
        return t.from_numpy(x)
    
    def get_key(self):
        pass
       
    def time_stretch(self, data, factor):
        audio_stretched = librosa.effects.time_stretch(data, factor)
        audio_stretched_resampled = librosa.resample(audio_stretched,  len(audio_stretched), len(data))
        return audio_stretched_resampled
    
    def pitch_shift(self, data):
        frequency = self.select_from_distribution()
        avg_frequency = self.get_avg_frequency(data)
        steps = self.get_steps(avg_frequency, frequency)
        
        return librosa.effects.pitch_shift(data, sr=self.sr, n_steps=steps)

    def select_from_distribution(self):
        distribution = np.array(self.distribution)
        rounded = np.around(distribution, 1)
        keys, vals = np.unique(rounded, return_counts=True)
        valIndex = vals.argsort()
        valIndex
        vals = vals[valIndex][::-1]
        keys = keys[valIndex][::-1]
        vals = np.divide(vals, sum(vals))

        return np.random.choice(keys, p=vals)
    
    def get_steps(self, avg_freq, freq):
        return 12 * np.log2(abs(avg_freq - freq))

    def close(self):
        self.file.close()

In [29]:
class UpSample(nn.Module):
    def __init__(self, input_channels, output_channels) -> None:
        super(UpSample,self).__init__()
        self.conv = nn.Sequential(nn.ReflectionPad2d(2), nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=5, stride=1))
        self.upscale = nn.PixelShuffle(2)
        self.nonlinearity = nn.GLU(dim=1)

    def forward(self, x):
        normFunc = nn.InstanceNorm2d(x.shape[1])
        conv = self.conv(x)
        normalize = normFunc(conv)
        upscale = self.upscale(normalize)
        finalOutput = self.nonlinearity(upscale)
        return finalOutput

In [11]:
class DownSample(nn.Module):
    def __init__(self, input_channels, output_channels) -> None:
        super(DownSample,self).__init__()
        self.conv = nn.Sequential(nn.ReflectionPad2d(2), nn.Conv2d(in_channels=input_channels, out_channels=output_channels, kernel_size=5, stride=2))
        self.nonlinearity = nn.GLU(dim=1)

    def forward(self, x):
        normFunc = nn.InstanceNorm2d(x.shape[1])
        conv = self.conv(x)
        normalize = normFunc(conv)
        finalOutput = self.nonlinearity(normalize)
        return finalOutput

In [12]:
class ResBlock(nn.Module):
    def __init__(self) -> None: #input image should be a 64x94 I THINK IM NOT SURE
        super(ResBlock, self).__init__()
        self.nonlinearity = nn.GLU(dim=1)
        self.conv1 = nn.Sequential(nn.ZeroPad2d((0,0,1,1)), nn.Conv2d(in_channels=256, out_channels=512, kernel_size=(1,3)))
        self.conv2 = nn.Sequential(nn.ZeroPad2d((0,0,1,1)), nn.Conv2d(in_channels=256, out_channels=256, kernel_size=(1,3)))

    
    def forward(self, x):
        firstConv = self.conv1(x)
        normFunc = nn.InstanceNorm2d(firstConv.shape[1])
        firstNorm = normFunc(firstConv)
        firstNonLinearity = self.nonlinearity(firstNorm)

        secondConv = self.conv2(firstNonLinearity)
        normFunc = nn.InstanceNorm2d(secondConv.shape[1])
        finalOutput = normFunc(secondConv)
        return finalOutput + x 

         

In [16]:
class Generator(nn.Module):
    def __init__(self, features) -> None:
        super(Generator, self).__init__()
        self.nonlinearity = nn.GLU(dim=1)
        self.conv1 = nn.Sequential(nn.ReflectionPad2d((2,2,7,7)), nn.Conv2d(in_channels=1, out_channels=128, kernel_size=(5, 15)))
        self.downsample1 = DownSample(input_channels=64, output_channels=256)
        self.downsample2 = DownSample(input_channels=128, output_channels=512)
        self.conv2 = nn.Conv2d(in_channels=( ((features+1)//2 + 1)//2*256), out_channels=256, kernel_size=1)
        for i in range(6):
            self.add_module(f"resblock{i+1}", ResBlock())
        self.conv3 = nn.Conv2d(in_channels=( ((features+1)//2 + 1)//2*256), out_channels=256, kernel_size=1)
        self.upsample1 = UpSample(input_channels=256, output_channels=1024)
        self.upsample2 = UpSample(input_channels=128, output_channels=512)
        self.conv4 = nn.Sequential(nn.ReflectionPad2d((2,2,7,7)), nn.Conv2d(in_channels=64, out_channels=features, kernel_size=(5, 15)))
        self.conv5 = nn.Sequential(nn.ReflectionPad2d((2,2,7,7)), nn.Conv2d(in_channels= ((features+1)//2 + 1)//2*4, kernel_size=(5,15), out_channels=1))

    def forward(self, x):
        firstConv = self.conv1(x)
        firstNonLinearity = self.nonlinearity(firstConv)
        firstDown = self.downsample1(firstNonLinearity)
        secondDown = self.downsample2(firstDown)
        secondDown = secondDown.reshape((secondDown.shape[0], -1,((secondDown.shape[2]+1)//2 + 1)//2, 1))

        secondConv = self.conv2(secondDown)
        normFunc = nn.InstanceNorm2d(secondConv.shape[1])
        secondNorm = normFunc(secondConv)
        for i in range(6):
            secondNorm = self.__getattr__(f"resblock{i+1}")(secondNorm)
        
        thirdConv = self.conv3(secondNorm)
        normFunc = nn.InstanceNorm2d(thirdConv.shape[1])
        thirdNorm = normFunc(thirdConv)
        thirdNorm = thirdNorm.reshape(thirdNorm.shape[0], -1, ((thirdNorm.shape[2]+1)//2 + 1)//2, ((thirdNorm.shape[3]+1)//2 + 1)//2)
        
        ups1 = self.upsample1(thirdNorm)
        ups2 = self.upsample2(ups1)

        fourthConv = self.conv4(ups2)
        fourthConv = fourthConv.reshape(fourthConv.shape[0],fourthConv.shape[3],fourthConv.shape[2],fourthConv.shape[1])

        return self.conv5(fourthConv)



In [17]:
class Discriminator(nn.Module):
    def __init__(self, width, height) -> None:
        super(Discriminator, self).__init__()
        self.nonlinearity = nn.GLU(dim=1)
        self.conv1 = nn.Sequential(nn.ReflectionPad2d(1), nn.Conv2d(in_channels=1, out_channels=128, kernel_size=3))
        self.downsample1 = DownSample(input_channels=64, output_channels=256)
        self.downsample2 = DownSample(input_channels=128, output_channels=512)
        self.downsample3 = DownSample(input_channels=256, output_channels=1024)
        self.conv2 = nn.Sequential(nn.ReflectionPad2d((0,0,2,2)), nn.Conv2d(in_channels=512, out_channels=1024, kernel_size=(1,5)))
        self.conv3 = nn.Sequential(nn.ReflectionPad2d((0,0,1,1)), nn.Conv2d(in_channels=512, out_channels=1, kernel_size=(1,3)))
        self.fc = nn.Linear(((width+7)//8) * ((height+7)//8), 1)

    def forward(self, x):
        firstConv = self.conv1(x)
        firstNonlinearity = self.nonlinearity(firstConv)
        downs1 = self.downsample1(firstNonlinearity)
        downs2 = self.downsample2(downs1)
        downs3 = self.downsample3(downs2)

        secondConv = self.conv2(downs3)
        normFunc = nn.InstanceNorm2d(secondConv.shape[1])
        firstNorm = normFunc(secondConv)
        secondNonlinearity = self.nonlinearity(firstNorm)

        thirdConv = self.conv3(secondNonlinearity)
        thirdConv = thirdConv.view(thirdConv.shape[0], -1)
        finalOutput = self.fc(thirdConv)
        return nn.Sigmoid()(finalOutput)


In [23]:
iu_h5 = h5py.File("iu.h5", 'r')
bruno_h5 = h5py.File("bruno.h5", 'r')

iu_dataset = iu_h5["audio"]
bruno_dataset = bruno_h5["audio"]

data_shape = iu_dataset.shape

iu_dataset = np.array(iu_dataset)
bruno_dataset = np.array(bruno_dataset)

iu_data = ArtistDataset(iu_dataset)
bruno_mars_data = ArtistDataset(bruno_dataset)

iu_h5.close()
bruno_h5.close()

In [27]:
test = Generator(data_shape[2])

80 130


In [28]:
test2 = Discriminator(data_shape[1], data_shape[2])

In [51]:
import numpy as np

origArr = [440.8, 441.5, 478, 441.5, 441.5, 478, 478, 478]
origArr = np.array(origArr)
roundedArr = np.around(origArr, 1)
keys, vals = np.unique(roundedArr, return_counts=True)
valIndex = vals.argsort()
valIndex
vals = vals[valIndex][::-1]
keys = keys[valIndex][::-1]
vals = np.divide(vals, sum(vals))

for i in range(20):
    choice = np.random.choice(keys, p=vals)
    # print(choice) #prints key
    index_list = np.where(origArr==choice)
    print(keys, choice, index_list)

[478.  441.5 440.8] 441.5 (array([1, 3, 4], dtype=int64),)
[478.  441.5 440.8] 441.5 (array([1, 3, 4], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 441.5 (array([1, 3, 4], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 441.5 (array([1, 3, 4], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 441.5 (array([1, 3, 4], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 441.5 (array([1, 3, 4], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 441.5 (array([1, 3, 4], dtype=int64),)
[478.  441.5 440.8] 478.0 (array([2, 5, 6, 7], dtype=int64),)
[478.  441.5 440.8] 441.5 (ar