# Create datasets for Morse training
## Base functions

In [None]:
import random
import string
import numpy as np

def random_partition(k, iterable):
    results = [[] for i in range(k)]
    for value in iterable:
        x = random.randrange(k)
        results[x].append(value)
    return results

def random_strings(k, rawchars):
    results = ["" for i in range(k)]
    for c in rawchars:
        x = random.randrange(k)
        results[x] += c
    return results

def get_morse_str(nchars=132, nwords=27):
    np.random.seed(0)
    rawchars = ''.join(random.choice(string.ascii_uppercase + string.digits) for _ in range(nchars))
    words = random_strings(nwords, rawchars)
    morsestr = ' '.join(words)
    return morsestr
    

Try it ...

In [None]:
morsestr = get_morse_str()
print(len(morsestr), morsestr)

## Dataset for Morse keying training (dits and dahs)
### Signal and labels 

In [None]:
import MorseGen
import matplotlib.pyplot as plt 
import numpy as np

def get_new_data(SNR_dB=-23, nchars=132, nwords=27):
    phrase = get_morse_str(nchars=nchars, nwords=nwords)
    print(len(phrase), phrase)
    Fs = 8000
    morse_gen = MorseGen.Morse()
    samples_per_dit = morse_gen.nb_samples_per_dit(Fs, 13)
    n_prev = int((samples_per_dit/128)*12) + 1 # number of samples to look back is slightly more than a dit-dah and a word space (2+3+7=12)
    print(f'Samples per dit at {Fs} Hz is {samples_per_dit}. Decimation is {samples_per_dit/128:.2f}. Look back is {n_prev}.')
    label_df = morse_gen.encode_df_decim(phrase, samples_per_dit, 128)
    # keep the envelope
    label_df_env = label_df.drop(columns=['dit','dah', 'ele', 'chr', 'wrd'])
    # remove the envelope
    label_df.drop(columns=['env'], inplace=True)
    SNR_linear = 10.0**(SNR_dB/10.0)
    SNR_linear *= 256 # Apply original FFT
    print(f'Resulting SNR for original {SNR_dB} dB is {(10.0 * np.log10(SNR_linear)):.2f} dB')
    t = np.linspace(0, len(label_df_env)-1, len(label_df_env))
    morsecode = label_df_env.env
    power = morsecode.var()
    noise_power = power/SNR_linear
    noise = np.sqrt(noise_power)*np.random.normal(0, 1, len(morsecode))
    # noise = butter_lowpass_filter(raw_noise, 0.9, 3) # Noise is also filtered in the original setup from audio. This empirically simulates it
    signal = morsecode + noise
    return signal, label_df, n_prev
    

Try it ...

In [None]:
signal, label_df, n_prev = get_new_data(-10)

# Show
print(n_prev)
print(type(signal), signal.shape)
print(type(label_df), label_df.shape)
    
x0 = 0
x1 = 1500

plt.figure(figsize=(50,6))
plt.plot(signal[x0:x1]*0.5, label="sig")
plt.plot(label_df[x0:x1].dit*0.9 + 1.0, label='dit')
plt.plot(label_df[x0:x1].dah*0.9 + 2.0, label='dah')
plt.plot(label_df[x0:x1].ele*0.9 + 3.0, label='ele')
plt.plot(label_df[x0:x1].chr*0.9 + 4.0, label='chr')
plt.plot(label_df[x0:x1].wrd*0.9 + 5.0, label='wrd')
plt.title("signal and labels")
plt.legend()
plt.grid()

### Define dataset

In [None]:
import torch

class MorsekeyingDataset(torch.utils.data.Dataset):
    def __init__(self, SNR_dB=-23, nchars=132, nwords=27):
        self.signal, self.label_df, self.seq_len = get_new_data(SNR_dB, nchars, nwords)
        self.X = torch.FloatTensor(self.signal.values)
        self.y = torch.FloatTensor(self.label_df.values)
        
    def __len__(self):
        return self.X.__len__() - self.seq_len

    def __getitem__(self, index):
        return (self.X[index:index+self.seq_len], self.y[index+self.seq_len])
    
    def get_signal(self):
        return self.signal
    
    def get_labels(self):
        return self.label_df
    
    def get_seq_len(self):
        return self.seq_len()

### Define data loader

In [None]:
train_dataset = MorsekeyingDataset(-10, 132*4, 27*4)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=1, shuffle=False) # Batch size must be 1

In [None]:
signal = train_dataset.get_signal()
label_df = train_dataset.get_labels()

print(type(signal), signal.shape)
print(type(label_df), label_df.shape)

x0 = 0
x1 = 1500

plt.figure(figsize=(50,6))
plt.plot(signal[x0:x1]*0.5, label="sig")
plt.plot(label_df[x0:x1].dit*0.9 + 1.0, label='dit')
plt.plot(label_df[x0:x1].dah*0.9 + 2.0, label='dah')
plt.plot(label_df[x0:x1].ele*0.9 + 3.0, label='ele')
plt.plot(label_df[x0:x1].chr*0.9 + 4.0, label='chr')
plt.plot(label_df[x0:x1].wrd*0.9 + 5.0, label='wrd')
plt.title("signal and labels")
plt.legend()
plt.grid()

In [None]:
it = iter(train_loader)
X, y = next(it)
print(X[0].shape, y[0].shape)
print(X[0], y[0])
X, y = next(it)
print(X[0], y[0])

In [None]:
X_t = torch.rand(10)
print(X_t.shape)
X_t = X_t.view(-1, 1, 1)
print(X_t.shape, X_t)

In [None]:
label_df.reindex()