# **Morse code translator**

Import libraries

In [None]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
from scipy.fft import fft, fftfreq
from IPython.display import Audio
from scipy.signal import butter, filtfilt
from scipy.fftpack import fft
from scipy.signal import find_peaks
from scipy.io import wavfile
from pydub import AudioSegment
import librosa.display
import difflib

Map the morse codes and its corresponding characters in a dictionary.

In [None]:
morse_map = {
    ".-": "A",      "-...": "B",    "-.-.": "C",    "-..": "D",     ".": "E",
    "..-.": "F",    "--.": "G",     "....": "H",    "..": "I",      ".---": "J",
    "-.-": "K",     ".-..": "L",    "--": "M",      "-.": "N",      "---": "O",
    ".--.": "P",    "--.-": "Q",    ".-.": "R",     "...": "S",     "-": "T",
    "..-": "U",     "...-": "V",    ".--": "W",     "-..-": "X",    "-.--": "Y",
    "--..": "Z",    ".----": "1",   "..---": "2",   "...--": "3",   "....-": "4",
    ".....": "5",   "-....": "6",   "--...": "7",   "---..": "8",   "----.": "9",
    "-----": "0", ".-.-.-": ".", "--..--": ",", "..--..": "?", "-.-.--": "!",
    "-.--.": "(", "-.--.-": ")", ".-...": "&", "---...": ":", "-.-.-.": ";",
    "-..-.": "/", "..--.-": "_", "-...-": "=", ".-.-.": "+", "-....-": "-", "...-..-": "$",
    ".--.-.": "@", ".----.": "'", ".-..-.": '"'
    }

Book in the text format

In [None]:
with open('Death_of_a_Spaceman.txt', 'r') as file:
    book = file.read().replace("=", "").replace("!", "")

# **Morse code encoder**

### Text -> Morse code -> Audio Morse code

Make a function to generate morse code of multiple characters

In [None]:
def morse_character(target):
    for key, value in morse_map.items():
        if value == target:
            return key
def morse_encode(text):
    text = text.strip().upper()
    words = text.split(' ')
    encoded_text = ''
    for word in words:
        for char in word:
            encoded_text += morse_character(char) + ' '
        encoded_text += '  '
    return encoded_text

print(morse_encode('Hello, my name is Vika'))

We will use sine wave to create the sound patterns.
By using the sine function, we can generate audio signals with varying frequencies, which produce different pitches when heard. 
This makes it suitable for creating tones and melodies in audio synthesis applications.

The formula represents a sound wave given by $y(t) = \sin(2\pi ft)$, where:

- $y(t)$ represents the value of the signal at time $t$,
- $t$ stands for time,
- $f$ represents the frequency of the signal (in hertz).

In [None]:
def generate_sound(duration, sample_rate=44100, frequency=750):
    # Generate a time array
    t = np.linspace(0, duration, int(duration * sample_rate))
    # Generate a sine wave
    sine_wave = np.sin(2* np.pi * frequency * t)
    return sine_wave

plt.plot(generate_sound(0.05))
plt.xlim([0, 500])
plt.show()

In [None]:
def generate_morse_audio(encoded_text, sample_rate=44100, frequency=750):
    # define the durations for each symbol
    dot_duration = 100
    dash_duration = 3 * dot_duration
    inter_symbol_gap_duration = dot_duration
    inter_character_gap_duration = 3 * dot_duration
    inter_word_gap_duration = 7 * dot_duration
    samples = np.array([])
    space_count = 0
    
    samples = np.concatenate((samples, np.zeros(int(inter_word_gap_duration / frequency * sample_rate))))
    for symbol in encoded_text:
        # generate sound for dot
        if symbol == '.':
            space_count = 0
            samples = np.concatenate((samples, generate_sound(dot_duration / frequency)))
            samples = np.concatenate((samples, np.zeros(int(inter_symbol_gap_duration / frequency * sample_rate))))
        # generate sound for dash
        elif symbol == '-':
            space_count = 0
            samples = np.concatenate((samples, generate_sound(dash_duration / frequency)))
            samples = np.concatenate((samples, np.zeros(int(inter_symbol_gap_duration / frequency * sample_rate))))
        # generate gap for inter character and inter word
        elif symbol == ' ':
            space_count += 1
            if space_count >= 2 :
                samples = np.concatenate((samples, np.zeros(int(inter_word_gap_duration / frequency * sample_rate))))
            else:
                samples = np.concatenate((samples, np.zeros(int(inter_character_gap_duration / frequency * sample_rate))))
        else:
            continue
    return samples, sample_rate

## Let`s put it all together

In [None]:
def text_to_audio(text):
    morse_code = morse_encode(text)
    samples, sample_rate = generate_morse_audio(morse_code)
    print('Text: ', text)
    print('Morse code: ', morse_code)
    print('Morse audio: ')
    display(Audio(samples, rate=sample_rate))

Let`s try this for some sentence like "Hello, we are students of APPS UCU!"

In [None]:
text = "Hello, we are students of APPS UCU!"
text_to_audio(text)

Also, let`s download this sound and save it in a folder called "download.wav" to check how the decoder works

# **Morse code decoder**

### Audio Morse code -> Morse code -> Text

Make a function for filtering audio

In [None]:
def bandpass_filter(data, lowcut, highcut, fs, order=5):
    """
    Smoothing filter
    """
    # Calculate the Nyquist frequency
    nyq = 0.5 * fs
    # Normalize the cutoff frequencies
    low = lowcut / nyq
    high = highcut / nyq
    # Design the bandpass filter using Butterworth filter design
    b, a = butter(order, [low, high], btype='band')
    # Apply the filter to the data using forward-backward filtering (filtfilt)
    y = filtfilt(b, a, data)
    return y

Define a function to find Signal times and Silence times and to plot the STFT result

In [None]:
def analyze_morse_code_segment(filename, start_sec, end_sec, lowcut, highcut, order=6, n_fft=2048, hop_length=512):
    """
    Loads audio, extracts needed part, and analyses it using STFFT, as librosa library uses FFT in its STFT function
    n_fft and hop_lenght are commonly chosen values for STFT. 
    n fft represents number of points used in Fast Fourier Transform and usually people take values that are some power of 2 as it optimizes FFT
    hop length is a size of the step with  with we move the window of stfft
    """
    # Load audio file
    y, fs = librosa.load(filename, sr=None)
    if y.ndim > 1:  # Convert stereo to mono if necessary
        y = np.mean(y, axis=1)

    # Extract the desired segment
    start_sample = int(start_sec * fs)
    end_sample = int(end_sec * fs)
    segment_data = y[start_sample:end_sample]

    # Apply a bandpass filter
    filtered_data = bandpass_filter(segment_data, lowcut, highcut, fs, order)

    # Perform STFT
    D = librosa.stft(filtered_data, n_fft=n_fft, hop_length=hop_length)
    D_magnitude, D_phase = librosa.magphase(D)

    # Convert magnitude to Decibels
    D_db = librosa.amplitude_to_db(D_magnitude, ref=np.max)

    # Plot the STFT result
    plt.figure(figsize=(12, 6))
    librosa.display.specshow(D_db, sr=fs, hop_length=hop_length, x_axis='time', y_axis='linear')
    plt.colorbar(format='%+2.0f dB')
    plt.title('STFT Magnitude (dB)')
    plt.show()

    # Now let's extact info about begining and end of peack periods as well as silence periods
    dB_threshold = -50
    signal_times = []
    silence_times = []
    is_signal = D_db > dB_threshold

    # We iterate over the time frames
    for i in range(1, is_signal.shape[1]):
        # Check if we transition from silence to signal or signal to silence
        if is_signal[:, i].any() and not is_signal[:, i - 1].any():
            # Start of a signal
            signal_start_time = i * hop_length / fs
        elif not is_signal[:, i].any() and is_signal[:, i - 1].any():
            # End of a signal and start of silence
            signal_end_time = i * hop_length / fs
            signal_times.append((signal_start_time, signal_end_time))
            silence_start_time = signal_end_time
        elif i == is_signal.shape[1] - 1 and is_signal[:, i].any():
            # If we're at the last frame and it's a signal, we end the signal
            signal_end_time = i * hop_length / fs
            signal_times.append((signal_start_time, signal_end_time))

    # After gathering all signal times, we can deduce silence times
    for i in range(len(signal_times) - 1):
        silence_times.append((signal_times[i][1], signal_times[i + 1][0]))

    # If the signal starts right away, there is no initial silence
    if signal_times[0][0] > 0:
        silence_times.insert(0, (0, signal_times[0][0]))

    # If the signal does not end at the last frame, we have trailing silence
    if signal_times[-1][1] < is_signal.shape[1] * hop_length / fs:
        silence_times.append((signal_times[-1][1], is_signal.shape[1] * hop_length / fs))
    return signal_times, silence_times

Let`s try it in the audio from our dataset ('000_Death_of_a_Spaceman.wav')

In [None]:
filename = 'Death_of_a_Spaceman/000_Death_of_a_Spaceman.wav'
start_sec = 0  # Start time in seconds
end_sec = 662   # End time in seconds
lowcut = 500   # Low frequency threshold (Hz)
highcut = 1500 # High frequency threshold (Hz)

signal_times, silence_times = analyze_morse_code_segment(filename, start_sec, end_sec, lowcut, highcut)
print('Signal times: ', signal_times)
print('Silence times: ', silence_times)

Make the function to find signal duration and silence duration

In [None]:
def calculate_durations(time_tuples):
    """
    Transforms obtained lists into the lists of durations
    """
    return [end - start for start, end in time_tuples]

Let`s make it for the audio from our dataset ('000_Death_of_a_Spaceman.wav')

In [None]:
signal_duration = calculate_durations(signal_times)
print(signal_duration)
silence_duration = calculate_durations(silence_times)
print(silence_duration)

Using durations obtain written Morse code

In [None]:
def morse_text(signal_duration, silence_duration):
    """
    Using durations obtain written Morse code
    """
    message = ""
    for i in range(len(signal_duration)-1):
        if signal_duration[i] < 0.3:
            message += '.'
        if signal_duration[i] >= 0.3:
            message += '-'
             
        if silence_duration[i + 1] < 0.4:
           message += ''
        elif silence_duration[i + 1] > 0.6:
           message += ' / ' # division between words
        elif silence_duration[i + 1] > 0.4 and silence_duration[i + 1] < 0.6:
           message += ' '           

    if signal_duration[-1] < 0.3:
        message += '.'
    if signal_duration[-1] >= 0.3:
        message += '-'
    return message

Let`s make it for the audio from our dataset ('000_Death_of_a_Spaceman.wav')

In [None]:
res = morse_text(signal_duration, silence_duration)
print(res)

Let`s find the function to translate Morse code to English text

In [None]:
def morse_to_english(morse_code):
    """
    Morse code transaltor 
    """

    # Split the Morse code into words based on '/'
    words = morse_code.split('/')
    decoded_message = []

    # Decode each Morse code symbol
    for word in words:
        decoded_word = ''.join(morse_map.get(symbol, '') for symbol in word.split())
        decoded_message.append(decoded_word)

    return ' '.join(decoded_message)

Let`s make it for the audio from our dataset ('000_Death_of_a_Spaceman.wav')

In [None]:
decoded_message = morse_to_english(res)[:-2]
decoded_message = decoded_message.replace("=", "")
print("Decoded Message:", decoded_message)

Also let`s check if this the correct traslate. For this we can check the file with the book transcription (file: "Death_of_a_Spaceman.txt")

In [None]:
first_part = book[:503]
print(first_part)

Let`s make a function to find accurancy

In [None]:
def calculate_accuracy(original_text, decoded_text):
    return difflib.SequenceMatcher(None, original_text.lower(), decoded_text.lower()).ratio()

This function utilizes the algorithm called the "equal blocks algorithm" to compare two strings of text and determine their similarity.

1. **Defining Equal Blocks**:
   Initially, the algorithm searches for all possible sequences of characters that match between the two text strings. These sequences can be identical words, substrings, or even individual characters.

2. **Evaluating Similarity**:
   After defining the equal blocks, the algorithm computes the relative similarity between the strings based on the number of matching characters. The more matches between the strings, the higher the similarity score returned.

3. **Returning the Similarity Score**:
   Finally, after computing the relative number of matches, the algorithm returns this similarity score as the function's output.


## Find accurancy

In [None]:
accuracy = calculate_accuracy(first_part, decoded_message)

print("Accuracy:", accuracy)

## Let`s put it all together

In [None]:
def sound_to_text(filename, duration):
    start_sec = 0  # Start time in seconds
    end_sec =duration   # End time in seconds
    lowcut = 500   # Low frequency threshold (Hz)
    highcut = 1500 # High frequency threshold (Hz)
    signal_times, silence_times = analyze_morse_code_segment(filename, start_sec, end_sec, lowcut, highcut)
    signal_duration = calculate_durations(signal_times)
    silence_duration = calculate_durations(silence_times)
    res = morse_text(signal_duration, silence_duration)
    display(Audio(filename))
    print('Morse code: ', res)
    decoded_message = morse_to_english(res)
    print("Decoded Message:", decoded_message)
    return decoded_message

## Let`s check if our decoder and encoder work correctly.  

In the Encoder part we downloaded the audio Morse code for the text "Hello, we are students of APPS UCU!", and saved it in a folder called "download.wav".  
Let`s try to decode this sound.

In [None]:
filename = 'download.wav'
sound_to_text(filename, 57)

In [None]:
print("Accuracy:", calculate_accuracy(text, sound_to_text(filename, 57)))

### The texts converged (accurancy=1), so the decoder and encoder work correctly

# Let`s try to translate 8 part of book and find accurancy

Text from book

In [None]:
eight_part = book[:4401]

Let`s decode this 8 part (sound)

In [None]:
decoded_eight_part =''
list_of_durations = [662, 661, 689, 664, 700, 703, 665, 706, 694, 668, 688, 701, 689, 672, 667, 698, 695, 689, 690, 698, 666, 681, 669, 727, 694, 707, 651, 684, 686, 679, 665, 684, 696, 708, 729, 687, 684, 697, 702, 663, 658, 682, 677, 698, 661, 686, 665, 707, 692, 674, 182]
for i in range(8):
    if i < 10:
        filename = 'Death_of_a_Spaceman/00{}_Death_of_a_Spaceman.wav'.format(i)
    else:
        filename = 'Death_of_a_Spaceman/0{}_Death_of_a_Spaceman.wav'.format(i)
    part = sound_to_text(filename, list_of_durations[i])
    decoded_eight_part+=part
decoded_eight_part = decoded_eight_part.replace("=", "")

Let`s compare this texts

In [None]:
print("Original book:\n", eight_part)
print("Book decoded from Morse code audio:\n", decoded_eight_part)

## Let`s check accurury

In [None]:
print("Accuracy: ", calculate_accuracy(eight_part, decoded_eight_part))

Also we can find words where our translator made mistakes

In [None]:
decoded_words = decoded_eight_part.lower().split()
print(decoded_words)
book_words = eight_part.lower().split()
print(book_words)

In [None]:
unique_elements1 = [x for x in decoded_words if x not in book_words]
unique_elements2 =[x for x in book_words if x not in decoded_words]
print(unique_elements1)
print(unique_elements2)

The wrong translated words:  
'd', 'new' = 'knew'  
'h', 'er' = 'her'  
'a', 'hite' = 'white'  
'a' = 'and'  
'swallong' = 'swallowing'  
'f5h-bone.' = 'fish-bone.'  
'5' = 'he'  
nothing = 'to'  

# Let`s try together

Make a Morse code audio

In [None]:
text = "A table is in the room"
text_to_audio(text)

Download the sound and try to decode

In [None]:
duration = 33
filename_presentation = "download1.wav"
sound_to_text(filename_presentation, duration)