In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import wfdb
import numpy as np
from scipy.io.wavfile import write
import matplotlib.pyplot as plt

In [None]:
#set the path to the dataset
data_path = '/content/drive/MyDrive/s23/deep_learning/DL4M_Final/Coding/ecg-to-wav/mit-bih-arrhythmia-database'


In [None]:
#select a record from the dataset
#note, this only works if data files are saved as numbers in the name (ex. 101.hea, 101.xyz). 
#for other file names, you must change the names to start with numbers and also change the info in the .hea files so that they know to reference each other with the updated names

for i in range(100, 235):
    record_name = str(i)

    try:
        #load the signal data from the record file
        signal, _ = wfdb.rdsamp(f'{data_path}/{record_name}')

        #get the sampling frequency of the record
        fs = wfdb.rdheader(f'{data_path}/{record_name}').fs

        #split the signal into 15-second chunks
        chunk_size = 15 * fs
        chunks = [signal[j:j+chunk_size] for j in range(0, len(signal), chunk_size)]

        #iterate over the chunks
        for chunk_idx, chunk in enumerate(chunks):

            #amplify the signal by a factor of 3000
            amplified_signal = chunk * 3000

            #scale the amplified signal to a 16-bit integer array
            scaled_data = np.int16(amplified_signal / np.max(np.abs(amplified_signal)) * 32767)

            #save the amplified signal as a WAV file
            write('/content/drive/MyDrive/dl4m/DL4M_Final/Coding/midi-converter/ecg-to-wav/wav-output/' + f'{record_name}_chunk{chunk_idx+1}_amplified_3000.wav', fs, scaled_data)
        
    except FileNotFoundError:
        print(f'File {record_name} not found. Skipping...')