In [248]:
import pandas as pd
import numpy as np
import glob
import os
import librosa
from pydub import AudioSegment
import pydub
import matplotlib.pyplot as plt
import random
from typing import List
import joblib

import torch.nn as nn
import torch
import torch.nn.functional as F


import lightning as L

from torch.utils.data import TensorDataset,DataLoader
import torchmetrics
from lightning.pytorch.callbacks import ModelCheckpoint,EarlyStopping,Callback


from lightning.pytorch.loggers import CSVLogger


from sklearn.preprocessing import StandardScaler,LabelEncoder
from sklearn.preprocessing import StandardScaler

In [9]:
print(torch.__version__)
print("Is CUDA available: ", torch.cuda.is_available())
print("Is Metal available: ", torch.backends.mps.is_available())

2.4.0.dev20240420
Is CUDA available:  False
Is Metal available:  True


In [10]:
parent_directory='../../data/train'

sub_folders=glob.glob(os.path.join(parent_directory,'*'))

files=[]

for folder_path in sub_folders:
    first_file=glob.glob(os.path.join(folder_path,'*.ogg'))
    files.extend(first_file)

In [12]:
# split train and val paths
# i decide to randomly choose one single file from each category folder as val path, and the left part as train paths

parent_directory='../../data/train'

sub_folders=glob.glob(os.path.join(parent_directory,'*'))


# Used to store randomly selected file paths
random_files = []
# Path to store the remaining files
files=[] 

# iterate each subfolder
for folder in sub_folders:
    # Get all file paths in subfolders
    all_files = glob.glob(os.path.join(folder, '*'))
    if all_files:  # Make sure the folder is not empty
        # Random select a file from the file list
        chosen_file = random.choice(all_files)
        # Add to random file list
        random_files.append(chosen_file)
        # Add the remaining files to another list
        files.extend([file for file in all_files if file != chosen_file])


print("Randomly selected files:", random_files)
print("Remaining files:", files)

Randomly selected files: ['../../data/train/ashpri1/XC116338.ogg', '../../data/train/asikoe2/XC115086.ogg', '../../data/train/ashwoo2/XC169810.ogg', '../../data/train/ashdro1/XC114598.ogg', '../../data/train/asiope1/XC194954.ogg', '../../data/train/asbfly/XC164848.ogg']
Remaining files: ['../../data/train/ashpri1/XC116339.ogg', '../../data/train/asikoe2/XC138196.ogg', '../../data/train/ashwoo2/XC125152.ogg', '../../data/train/ashdro1/XC114599.ogg', '../../data/train/asiope1/XC397761.ogg', '../../data/train/asbfly/XC134896.ogg']


## data preprocessing

In [13]:
def read_audio(path:str)->pydub.audio_segment.AudioSegment:
    """
    read ogg file as pydub.audio_segment.AudioSegment for the following steps

    parametere:
        path: *.ogg file path

    return
        audio: the readed audio data
    """
    audio = AudioSegment.from_file(path, format="ogg")

    return audio    

In [14]:
# Regarding the data of a single audio, some audio information needs to be paid attention to, such as audio duration, sampling rate, bit rate and number of channels.

def audio_info(audio:pydub.audio_segment.AudioSegment):
    """
    Grab all information of the input audio

    Parameters:
        Audio: the readed audio data

    Return:
        the information of the audio
    """
    # the audio duration time (seconds)
    duration_seconds=len(audio)/1000.0

    # the audio sampling rate
    sr=audio.frame_rate

    # the num of channels
    num_channels=audio.channels

    #bit rate
    bit_rate=audio.sample_width * 8

    return duration_seconds, sr, num_channels, bit_rate



In [15]:
# Convert audio data into array

def audio2array(audio_slices:list)->np.array:
    """
    transform audio segments to arrays
    """
    audio_arrays=np.array([np.array(audio_slice.get_array_of_samples()) for audio_slice in audio_slices])

    return audio_arrays

    

In [16]:
def slice_audio_5_align(audio:pydub.audio_segment.AudioSegment)->List[pydub.audio_segment.AudioSegment]:
    """
    Slice the complete audio into multiple 5 seconds length,
    keep all slice have the same length, especially for the last slice

    Return the list of all audio segments
    """
    # set up the segment duration
    # Set up the segment duration
    segment_duration = 5 * 1000  # 5 seconds in milliseconds

    # Check if the audio is less than 5 seconds
    if len(audio) < segment_duration:
        # Calculate the required padding length
        padding_length = segment_duration - len(audio)
        # Create a silent audio segment for padding
        silence = AudioSegment.silent(duration=padding_length)
        # Pad the audio with silence
        padded_audio = audio + silence
        return [padded_audio]  # Return the padded audio as a single segment

    # If the audio is 5 seconds or longer, proceed as normal
    segments = [audio[i:i + segment_duration] for i in range(0, len(audio), segment_duration)]

    # Ensure the last segment is exactly 5 seconds long
    if len(segments[-1]) != segment_duration:
        last_segment = audio[-segment_duration:]  # Get the last 5 seconds of the audio
        segments[-1] = last_segment  # Replace the last segment with a full 5-second segment

    return segments

In [17]:
def audio_bitrate_norm(bit_rate:float,audio_array:np.array):
    """
    because the .ogg file readed through pydub would based off the audio original bit rate,
    we want the value of the audio keep small, 
    so do normalization based off the bit rate.

    Parameters:
        bit_rate: the bit rate of the audio
        audio_array: the data in array form for each single slice
    """
    audio_array_norm = audio_array / float(2**(bit_rate-1))

    return audio_array_norm
    

In [18]:
def audio_random_sampling_2(total_samples:int,audio_segment:pydub.audio_segment.AudioSegment)-> List[pydub.audio_segment.AudioSegment]:
    """
    Randomly extract audio clips and combine them into 2 seconds of audio

    Parameters:
        total_samples: the number of randomly synthesized audio clips
        audio_segment: the single audio segment in form `pydub.audio_segment.AudioSegment`

    Return:
        The list of all random extract audio clips in form `pydub.audio_segment.AudioSegment`

    """
    random_clips=[]
    clip_num=0

    # Our goal is to randomly extract a total of 2 seconds of audio
    total_duration_ms = 2*1000

    while clip_num<total_samples:
        #Store the extracted fragments
        extracted_segments = AudioSegment.silent(duration=0)  # Create a silent segment for subsequent splicing

        # Continue looping when the total length of the extracted segments is less than 2 seconds
        while extracted_segments.duration_seconds < 2:
            # Random choose a starting point
            start_ms = random.randint(0, len(audio_segment) - 1)
            # Calculate the maximum duration that can be extracted
            max_extract_ms = total_duration_ms - int(extracted_segments.duration_seconds * 1000)
            # Random determine the duration of this draw
            extract_duration_ms = random.randint(1, max_extract_ms)
            # Random extracted fragments
            extract = audio_segment[start_ms:start_ms+extract_duration_ms]
            # concat the extracted fragments
            extracted_segments += extract

        # Final discontinuous random 2 seconds of audio data
        random_two_seconds = extracted_segments

        random_clips.append(random_two_seconds)

        clip_num+=1

    return random_clips

In [19]:
def timeD2preqD(normalized_clip:np.array,sr:int):
    """
    transform audio from time domain to frequency domain

    Parameters:
        normalized_clip: the single clip in array format (data)
        sr: sampling rate

    Return:
        Due to symmetry, only half the spectrum is needed.
        And becuase the range of frequencies depends on the sampling rate of the audio signal.
        we do not need the frequency info, all of them are the same.
    """
    fft = np.fft.fft(normalized_clip)
    magnitude = np.abs(fft)
    frequency = np.linspace(0, sr, len(magnitude))

    half_len = len(magnitude) // 2
    frequency=frequency[:half_len]
    magnitude=magnitude[:half_len]

    return magnitude,frequency

In [20]:
labels_list=[]
all_audios_magnitude=[]
for path in files:
    print(path)
    labels=[]
    # extract label
    label=path.split('/')[-2]

    # read audio
    audio=read_audio(path)

    # grab audio information
    duration_secconds,sr,num_channels,bit_rate=audio_info(audio)

    # slice audio into multi 5 seconds
    slice_5_all=slice_audio_5_align(audio)

    for single_slice in slice_5_all:

        # slice audio on each 5 sec long audio and generate multi 2 sec clips
        random_clips=audio_random_sampling_2(total_samples=5,audio_segment=single_slice)

        # convert all 2 sec clips to array format
        audio_arrays_2sec=audio2array(random_clips)

        # normalize each 2 sec audio array 
        arrays_2sec_norm=[]
        for i in audio_arrays_2sec:
            array_2sec_norm=audio_bitrate_norm(bit_rate=bit_rate,audio_array=i)
            arrays_2sec_norm.append(array_2sec_norm)
        
        # arrays_2sec_norm=np.array(arrays_2sec_norm)

        # conver audio from time domain to frequency domain
        audios_magnitude=[]
        for i in arrays_2sec_norm:
            magnitude,frequency=timeD2preqD(normalized_clip=i,sr=sr)
            audios_magnitude.append(magnitude)

        # audios_magnitude=np.array(audios_magnitude)

        labels.append(label)
        all_audios_magnitude.append(audios_magnitude)


    # all_audios_magnitude=np.array(all_audios_magnitude)

    
    labels_list.append(labels)



../../data/train/ashpri1/XC116339.ogg
../../data/train/asikoe2/XC138196.ogg
../../data/train/ashwoo2/XC125152.ogg
../../data/train/ashdro1/XC114599.ogg
../../data/train/asiope1/XC397761.ogg
../../data/train/asbfly/XC134896.ogg


In [21]:
# same step for val-set prepare

val_labels_list=[]
val_all_audios_magnitude=[]
for path in random_files:
    print(path)
    labels=[]
    # extract label
    label=path.split('/')[-2]

    # read audio
    audio=read_audio(path)

    # grab audio information
    duration_secconds,sr,num_channels,bit_rate=audio_info(audio)

    # slice audio into multi 5 seconds
    slice_5_all=slice_audio_5_align(audio)

    for single_slice in slice_5_all:

        # slice audio on each 5 sec long audio and generate multi 2 sec clips
        random_clips=audio_random_sampling_2(total_samples=5,audio_segment=single_slice)

        # convert all 2 sec clips to array format
        audio_arrays_2sec=audio2array(random_clips)

        # normalize each 2 sec audio array 
        arrays_2sec_norm=[]
        for i in audio_arrays_2sec:
            array_2sec_norm=audio_bitrate_norm(bit_rate=bit_rate,audio_array=i)
            arrays_2sec_norm.append(array_2sec_norm)
        
        # arrays_2sec_norm=np.array(arrays_2sec_norm)

        # conver audio from time domain to frequency domain
        audios_magnitude=[]
        for i in arrays_2sec_norm:
            magnitude,frequency=timeD2preqD(normalized_clip=i,sr=sr)
            audios_magnitude.append(magnitude)

        # audios_magnitude=np.array(audios_magnitude)

        labels.append(label)
        val_all_audios_magnitude.append(audios_magnitude)


    # all_audios_magnitude=np.array(all_audios_magnitude)

    
    val_labels_list.append(labels)

../../data/train/ashpri1/XC116338.ogg
../../data/train/asikoe2/XC115086.ogg
../../data/train/ashwoo2/XC169810.ogg
../../data/train/ashdro1/XC114598.ogg
../../data/train/asiope1/XC194954.ogg
../../data/train/asbfly/XC164848.ogg


In [22]:
labels_list

[['ashpri1', 'ashpri1'],
 ['asikoe2', 'asikoe2', 'asikoe2', 'asikoe2', 'asikoe2'],
 ['ashwoo2',
  'ashwoo2',
  'ashwoo2',
  'ashwoo2',
  'ashwoo2',
  'ashwoo2',
  'ashwoo2',
  'ashwoo2',
  'ashwoo2',
  'ashwoo2',
  'ashwoo2'],
 ['ashdro1',
  'ashdro1',
  'ashdro1',
  'ashdro1',
  'ashdro1',
  'ashdro1',
  'ashdro1',
  'ashdro1'],
 ['asiope1'],
 ['asbfly', 'asbfly', 'asbfly', 'asbfly', 'asbfly', 'asbfly']]

In [23]:
labels_flatten_list = [element for sublist in labels_list for element in sublist]
print(labels_flatten_list)
print(len(labels_flatten_list))

['ashpri1', 'ashpri1', 'asikoe2', 'asikoe2', 'asikoe2', 'asikoe2', 'asikoe2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'asiope1', 'asbfly', 'asbfly', 'asbfly', 'asbfly', 'asbfly', 'asbfly']
33


In [24]:
all_audios_magnitude=np.array(all_audios_magnitude)

In [25]:
print(all_audios_magnitude.shape)

(33, 5, 32000)


In [44]:
print(all_audios_magnitude)

[[[1.81686464e-02 4.81870678e+00 4.33423088e+00 ... 5.71471008e-02
   7.89477954e-02 8.27030752e-02]
  [2.41087334e+00 1.01080087e+01 3.89218539e+00 ... 3.94646991e-02
   3.66282951e-02 2.98537665e-02]
  [4.65034673e+00 6.72856101e+00 1.37018789e+01 ... 1.30813245e-01
   8.74947668e-02 1.27252597e-01]
  [2.42534146e+00 5.45813414e+00 6.52445159e+00 ... 6.41737195e-02
   7.28165364e-02 7.87558942e-02]
  [5.29594434e+00 4.89940171e+00 4.45367478e+00 ... 8.42250725e-02
   1.07353951e-01 8.76623899e-02]]

 [[1.24898088e+00 6.13831839e+00 4.70692573e+00 ... 1.39811849e-01
   1.01204803e-01 6.18743821e-02]
  [6.33152206e+00 2.73318003e+00 1.72171257e+00 ... 1.92442911e-01
   5.04104599e-02 1.88744907e-01]
  [5.15870596e+00 4.54021180e+00 2.64509008e+00 ... 4.54308252e-02
   8.92498460e-02 1.37793069e-01]
  [7.49060958e-01 2.76398303e+00 6.64030817e+00 ... 6.61474730e-02
   7.56880047e-02 6.89109598e-02]
  [8.13311935e+00 3.95740002e+00 7.31788068e+00 ... 1.81223302e-01
   1.39337953e-01 2.33

In [27]:
# same step for val set

val_labels_flatten_list = [element for sublist in val_labels_list for element in sublist]
print(val_labels_flatten_list)
print(len(val_labels_flatten_list))

['ashpri1', 'ashpri1', 'ashpri1', 'ashpri1', 'ashpri1', 'ashpri1', 'asikoe2', 'asikoe2', 'asikoe2', 'asikoe2', 'asikoe2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'asiope1', 'asiope1', 'asiope1', 'asbfly', 'asbfly', 'asbfly', 'asbfly']
34


In [28]:
val_all_audios_magnitude=np.array(val_all_audios_magnitude)

print(val_all_audios_magnitude.shape)

(34, 5, 32000)


## label encoder

In [29]:
# initialize label encoder

encoder=LabelEncoder()

# use Labelencoder to transform labels
encoded_labels=encoder.fit_transform(labels_flatten_list)

In [30]:
print('encoded labels:', encoded_labels)
print(len(encoded_labels))

encoded labels: [2 2 4 4 4 4 4 3 3 3 3 3 3 3 3 3 3 3 1 1 1 1 1 1 1 1 5 0 0 0 0 0 0]
33


In [31]:
# If needed, you can view the mapping of original labels to encodings
label_mapping = dict(zip(encoder.classes_, encoder.transform(encoder.classes_)))
print("Label to integer mapping:", label_mapping)

Label to integer mapping: {'asbfly': 0, 'ashdro1': 1, 'ashpri1': 2, 'ashwoo2': 3, 'asikoe2': 4, 'asiope1': 5}


In [57]:
# # decoding
# decoded_labels = encoder.inverse_transform(encoded_labels)  # 解码前10个标签作为示例
# print("Decoded Labels:", decoded_labels)

Decoded Labels: ['ashpri1' 'ashpri1' 'ashpri1' 'ashpri1' 'ashpri1' 'ashpri1' 'ashpri1'
 'ashpri1' 'asikoe2' 'asikoe2' 'asikoe2' 'asikoe2' 'asikoe2' 'asikoe2'
 'asikoe2' 'asikoe2' 'asikoe2' 'asikoe2' 'ashwoo2' 'ashwoo2' 'ashwoo2'
 'ashwoo2' 'ashwoo2' 'ashwoo2' 'ashwoo2' 'ashwoo2' 'ashwoo2' 'ashwoo2'
 'ashwoo2' 'ashwoo2' 'ashwoo2' 'ashwoo2' 'ashwoo2' 'ashdro1' 'ashdro1'
 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1'
 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1'
 'ashdro1' 'ashdro1' 'ashdro1' 'ashdro1' 'asiope1' 'asiope1' 'asiope1'
 'asiope1' 'asbfly' 'asbfly' 'asbfly' 'asbfly' 'asbfly' 'asbfly' 'asbfly'
 'asbfly' 'asbfly' 'asbfly']


In [45]:
! mkdir ./pickles/

In [33]:
# Save the label encoder to a file
joblib.dump(encoder, './pickles/label_encoder.joblib')

['./pickles/label_encoder.joblib']

In [34]:
# Load label encoder from file
loaded_label_encoder = joblib.load('./pickles/label_encoder.joblib')

loaded_label_encoder.inverse_transform(encoded_labels)

array(['ashpri1', 'ashpri1', 'asikoe2', 'asikoe2', 'asikoe2', 'asikoe2',
       'asikoe2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2',
       'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2', 'ashwoo2',
       'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1', 'ashdro1',
       'ashdro1', 'ashdro1', 'asiope1', 'asbfly', 'asbfly', 'asbfly',
       'asbfly', 'asbfly', 'asbfly'], dtype='<U7')

In [35]:
# then use the loaded encoder to encode val set labels

val_encoded_labels=loaded_label_encoder.fit_transform(val_labels_flatten_list)

In [36]:
print('encoded labels:', val_encoded_labels)
print(len(val_encoded_labels))

encoded labels: [2 2 2 2 2 2 4 4 4 4 4 3 3 3 3 1 1 1 1 1 1 1 1 1 1 1 1 5 5 5 0 0 0 0]
34


## global normalization

In [42]:
scaler=StandardScaler()
all_audios_magnitude_norm=scaler.fit_transform(all_audios_magnitude.reshape(-1,all_audios_magnitude.shape[-1])).reshape(all_audios_magnitude.shape)

In [43]:
print(all_audios_magnitude_norm.shape)
print(all_audios_magnitude_norm)

(33, 5, 32000)
[[[-0.60150175  1.75064311  1.38202133 ... -0.18650158  0.2927888
    0.2458972 ]
  [ 0.48014446  4.45496844  1.17155445 ... -0.44697244 -0.45597294
   -0.58597602]
  [ 1.49252092  2.72711736  5.84214968 ...  0.89863856  0.44401094
    0.94712784]
  [ 0.48668492  2.07757086  2.4248301  ... -0.08299588  0.18430803
    0.1837667 ]
  [ 1.7843698   1.79190098  1.43889101 ...  0.21237085  0.79538081
    0.32395917]]

 [[-0.04510071  2.42533688  1.55946895 ...  1.03119262  0.68658353
   -0.08195632]
  [ 2.25251312  0.68435064  0.13814806 ...  1.80647519 -0.21212418
    1.91504601]
  [ 1.72232981  1.60825361  0.57778693 ... -0.35908834  0.47506367
    1.11303988]
  [-0.27109453  0.70009966  2.47999178 ... -0.05392148  0.23511311
    0.02880275]
  [ 3.06694328  1.31027239  2.80259788 ...  1.64120459  1.3612759
    2.61658844]]

 [[-0.53842718 -0.50581829 -0.49141359 ... -0.38002635 -0.42623826
    0.89489129]
  [-0.51230913 -0.62498472 -0.64391015 ...  1.51183225  2.2253221
    

In [46]:
# save scaler

joblib.dump(scaler, './pickles/scaler_model.pkl')

['./pickles/scaler_model.pkl']

In [47]:
# Later, load the StandardScaler instance when deploying the model
loaded_scaler = joblib.load('./pickles/scaler_model.pkl')

val_all_audios_magnitude_norm = loaded_scaler.transform(val_all_audios_magnitude.reshape(-1, val_all_audios_magnitude.shape[-1])).reshape(val_all_audios_magnitude.shape)

In [48]:
print(val_all_audios_magnitude_norm.shape)
print(val_all_audios_magnitude_norm)


(34, 5, 32000)
[[[-2.96661618e-01 -6.21576370e-01  1.35383039e-01 ...  4.46073585e+00
    5.01726441e+00  2.89779311e+00]
  [-3.21112425e-01 -1.77822865e-02 -2.88892756e-01 ... -1.42808933e-01
   -3.35280570e-01 -6.65739535e-01]
  [-3.58124485e-01 -4.89836755e-01  3.89714426e-01 ...  2.76594705e-01
   -6.83490813e-02 -9.66597405e-02]
  [ 5.22984045e-01  2.27902500e-01  2.30623069e-01 ... -2.40654137e-02
   -3.07987943e-01 -4.64070870e-01]
  [-3.80511734e-01 -3.21570032e-01  2.21361356e-01 ...  1.95712279e+00
    1.83632179e+00  3.29184347e+00]]

 [[ 3.01381659e-01  3.35694978e-01 -1.43715363e-01 ...  1.07712162e+00
    7.79471764e-01  3.03887798e-01]
  [-4.05915720e-01 -2.13267223e-01 -1.13360247e-01 ...  2.32126216e+00
    1.77975424e+00 -7.59200042e-01]
  [-1.88137944e-01 -2.74027318e-01 -3.58961878e-01 ...  1.89930823e+00
    1.72475294e+00 -1.76090712e-01]
  [ 2.88734198e-02  4.88467463e-03  1.85810327e-01 ...  2.44931403e-01
    1.78712298e+00  1.41414016e+00]
  [ 8.01468570e-02  

In [49]:
! rm ./pickles/*
! rmdir ./pickles

## build up neural network

In [52]:
train_features = torch.Tensor(all_audios_magnitude_norm)
val_features = torch.Tensor(val_all_audios_magnitude_norm)
train_labels = torch.Tensor(encoded_labels)
val_labels = torch.Tensor(val_encoded_labels)

In [66]:
print(train_features.shape)
print(val_features.shape)
print(train_labels.shape)
print(val_labels.shape)

torch.Size([33, 5, 32000])
torch.Size([34, 5, 32000])
torch.Size([33])
torch.Size([34])


In [79]:
class ConvBlock1(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv1d(in_channels=5,out_channels=32,kernel_size=2,stride=2,padding=0)
        self.conv2=nn.Conv1d(in_channels=5,out_channels=32,kernel_size=4,stride=2,padding=1)
        self.conv3=nn.Conv1d(in_channels=5,out_channels=32,kernel_size=8,stride=2,padding=3)
    def forward(self,x):
        x1=self.conv1(x)
        x2=self.conv2(x)
        x3=self.conv3(x)
        # The length of the input data shape is 32000, and the stride is 2, so after conv1d, the length becomes 16000
        # The number of output channels of each conv1d layer is 32, so for the shape of the entire output, regardless of batchsize, it is 32*16000
        # Because of the chrononet architecture, we need to connect the outputs of the three layers to become 96*16000 output data.
        x=torch.cat((x1,x2,x3),dim=1)

        return x


In [87]:
## check output shape

model =ConvBlock1()

output_test_1=model(train_features)

print(output_test_1.shape)
print(output_test_1)

torch.Size([33, 96, 16000])
tensor([[[ 3.2082e-01, -5.7716e-01, -6.0971e-01,  ..., -8.2652e-01,
          -5.5966e-01, -7.3094e-01],
         [ 2.3690e+00,  2.6382e+00,  1.8766e+00,  ...,  5.4351e-01,
           5.5883e-01,  3.7526e-01],
         [-1.0461e+00, -2.5734e+00, -1.7035e+00,  ..., -8.7640e-01,
          -7.3219e-01, -4.8908e-01],
         ...,
         [-8.2224e-01,  7.9491e-01, -4.8606e-01,  ..., -2.6135e-01,
           8.7546e-04, -4.2487e-02],
         [ 1.1410e+00,  1.2414e+00,  8.9403e-01,  ..., -2.9899e-01,
          -3.1044e-01, -2.2978e-01],
         [ 3.7328e-02,  7.5953e-02, -1.2442e-01,  ..., -2.8588e-01,
           3.2260e-01,  1.3019e-01]],

        [[ 4.3485e-01, -8.5247e-01,  1.1724e+00,  ...,  3.1057e-01,
           4.0219e-01, -1.0443e-01],
         [ 1.1806e+00,  4.9628e-01,  9.9513e-01,  ...,  1.0207e+00,
           2.7607e-01,  5.4837e-01],
         [-7.0068e-01, -1.7329e+00, -1.4784e+00,  ..., -6.4664e-01,
          -9.8242e-01, -1.5394e+00],
         ..

In [93]:
class ConvBlock2(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv1d(in_channels=96,out_channels=32,kernel_size=2,stride=2,padding=0)
        self.conv2=nn.Conv1d(in_channels=96,out_channels=32,kernel_size=4,stride=2,padding=1)
        self.conv3=nn.Conv1d(in_channels=96,out_channels=32,kernel_size=8,stride=2,padding=3)

    def forward(self,x):
        x1=self.conv1(x)
        x2=self.conv2(x)
        x3=self.conv3(x)
        # From the output of ConvBlock1, we know that the input shape of convBlock2 is 96*16000
        # After the calculation of this block, the output will become 96*8000
        x=torch.cat((x1,x2,x3),dim=1)

        return x

In [95]:
## check output shape

model =ConvBlock2()

output_test_2=model(output_test_1)

print(output_test_2.shape)
print(output_test_2)

torch.Size([33, 96, 8000])
tensor([[[-1.8299e-02, -3.7645e-01, -1.4992e-01,  ..., -3.0809e-01,
          -2.3014e-01, -1.5019e-01],
         [ 3.7378e-01,  2.6526e-01,  3.0702e-01,  ...,  1.0680e-01,
          -5.2213e-02,  1.6615e-02],
         [-3.0561e-01, -2.7692e-01, -3.4077e-01,  ...,  9.5736e-02,
           5.5747e-02,  7.0089e-02],
         ...,
         [ 2.9844e-01, -1.6223e-01,  2.1274e-01,  ...,  4.3536e-01,
           3.8831e-01,  3.1669e-01],
         [-6.6846e-01, -1.2972e+00, -1.0175e+00,  ..., -6.7008e-01,
          -4.7979e-01, -3.5018e-01],
         [-7.3458e-02,  7.3898e-01,  2.8393e-01,  ..., -1.1900e-01,
          -4.9675e-03, -1.8927e-02]],

        [[-6.0680e-02,  1.0096e-02,  2.9825e-01,  ...,  5.2853e-01,
          -2.3376e-01, -2.9571e-01],
         [-8.7680e-02,  2.2218e-01, -5.9301e-02,  ...,  1.5120e-02,
          -2.6125e-01, -2.3533e-01],
         [-3.4706e-01, -5.6057e-01, -5.4021e-01,  ...,  1.0120e-01,
          -3.8656e-01, -4.3388e-01],
         ...

In [99]:
class ConvBlock3(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1=nn.Conv1d(in_channels=96,out_channels=32,kernel_size=2,stride=2,padding=0)
        self.conv2=nn.Conv1d(in_channels=96,out_channels=32,kernel_size=4,stride=2,padding=1)
        self.conv3=nn.Conv1d(in_channels=96,out_channels=32,kernel_size=8,stride=2,padding=3)

    def forward(self,x):
        x1=self.conv1(x)
        x2=self.conv2(x)
        x3=self.conv3(x)
        # From the output of ConvBlock1, we know that the input shape of convBlock2 is 96*8000
        # After the calculation of this block, the output will become 96*4000
        x=torch.cat((x1,x2,x3),dim=1)

        return x

In [100]:
## check output shape

model =ConvBlock3()

output_test_3=model(output_test_2)

print(output_test_3.shape)
print(output_test_3)

torch.Size([33, 96, 4000])
tensor([[[ 2.1763e-01,  1.6819e-03, -2.9034e-01,  ...,  2.4235e-01,
           2.4384e-01,  1.3891e-01],
         [-1.9164e-01, -3.7283e-01, -2.2973e-01,  ...,  1.0915e-01,
           3.3382e-02,  2.8995e-02],
         [-1.2335e-01, -1.3528e-01,  3.1715e-01,  ..., -1.6945e-01,
          -1.9558e-01, -9.9069e-02],
         ...,
         [-2.3842e-01, -4.7206e-01, -4.7228e-01,  ...,  5.5285e-02,
           1.6305e-02,  9.8824e-02],
         [-5.2599e-01, -9.0719e-02,  1.1435e-01,  ..., -1.3734e-01,
          -1.2618e-01,  4.1087e-02],
         [ 3.0367e-01, -3.1348e-01,  5.4315e-02,  ..., -3.1884e-02,
           3.1888e-02,  8.4352e-02]],

        [[-3.3234e-01, -7.4199e-03, -2.3013e-01,  ..., -1.0078e-01,
          -1.8817e-01,  4.0805e-04],
         [-4.8083e-01, -1.3919e-01, -3.0645e-01,  ..., -1.4822e-01,
          -6.0931e-02, -1.7554e-01],
         [ 3.1605e-01,  1.7554e-01, -1.8008e-02,  ...,  3.0727e-01,
           9.0435e-02,  8.0738e-02],
         ...

In [107]:
class ChronoNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1=ConvBlock1()
        self.block2=ConvBlock2()
        self.block3=ConvBlock3()
        self.gru1=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)

    def forward(self,x):
        x=self.block1(x)
        x=self.block2(x)
        x=self.block3(x)
        # Because the input shape required by gru is (batch_size, sequence length, feature_size)
        # But the result of the previous conversion calculation is (batchsize, feature_size, sequence length)
        # I need to change the shape
        x=x.permute(0,2,1)
        gru_out1,_=self.gru1(x)

        return gru_out1,_
        
        
# The above is used to view the output after adding a gru
# Next I need to add more layers

In [108]:
model=ChronoNet()
output_test_4,_=model(train_features)

print(output_test_4.shape)

print(_.shape)

torch.Size([33, 4000, 32])
torch.Size([1, 33, 32])


In [111]:
class ChronoNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1=ConvBlock1()
        self.block2=ConvBlock2()
        self.block3=ConvBlock3()
        self.gru1=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.gru2=nn.GRU(input_size=32,hidden_size=32,num_layers=1,batch_first=True)


    def forward(self,x):
        x=self.block1(x)
        x=self.block2(x)
        x=self.block3(x)
        # Because the input shape required by gru is (batch_size, sequence length, feature_size)
        # But the result of the previous conversion calculation is (batchsize, feature_size, sequence length)
        # I need to change the shape
        x=x.permute(0,2,1)
        gru_out1,_=self.gru1(x)
        gru_out2,_=self.gru2(gru_out1)
        # According to the chrononet architecture, we need to connect the calculations of the two layers of GRU according to the feature-size dimension
        x=torch.cat((gru_out1,gru_out2),dim=2)

        return x
    

# The above is used to view the output after adding a gru
# Next I need to add more layers

In [113]:
model=ChronoNet()
output_test_5=model(train_features)

print(output_test_5.shape)

torch.Size([33, 4000, 64])


In [114]:
class ChronoNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1=ConvBlock1()
        self.block2=ConvBlock2()
        self.block3=ConvBlock3()
        self.gru1=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.gru2=nn.GRU(input_size=32,hidden_size=32,num_layers=1,batch_first=True)
        self.gru3=nn.GRU(input_size=64,hidden_size=32,num_layers=1,batch_first=True)


    def forward(self,x):
        x=self.block1(x)
        x=self.block2(x)
        x=self.block3(x)
        # Because the input shape required by gru is (batch_size, sequence length, feature_size)
        # But the result of the previous conversion calculation is (batchsize, feature_size, sequence length)
        # I need to change the shape
        x=x.permute(0,2,1)
        gru_out1,_=self.gru1(x)
        gru_out2,_=self.gru2(gru_out1)
        # According to the chrononet architecture, we need to connect the calculations of the two layers of GRU according to the feature-size dimension
        x=torch.cat((gru_out1,gru_out2),dim=2)
        gru_out3,_=self.gru3(x)
        x=torch.cat((gru_out1,gru_out2,gru_out3),dim=2)

        return x
    

# The above is used to view the output after adding a gru
# Next I need to add more layers

In [115]:
model=ChronoNet()
output_test_6=model(train_features)

print(output_test_6.shape)

torch.Size([33, 4000, 96])


In [116]:
class ChronoNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1=ConvBlock1()
        self.block2=ConvBlock2()
        self.block3=ConvBlock3()
        self.gru1=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.gru2=nn.GRU(input_size=32,hidden_size=32,num_layers=1,batch_first=True)
        self.gru3=nn.GRU(input_size=64,hidden_size=32,num_layers=1,batch_first=True)
        self.gru4=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)


    def forward(self,x):
        x=self.block1(x)
        x=self.block2(x)
        x=self.block3(x)
        # Because the input shape required by gru is (batch_size, sequence length, feature_size)
        # But the result of the previous conversion calculation is (batchsize, feature_size, sequence length)
        # I need to change the shape
        x=x.permute(0,2,1)
        gru_out1,_=self.gru1(x)
        gru_out2,_=self.gru2(gru_out1)
        # According to the chrononet architecture, we need to connect the calculations of the two layers of GRU according to the feature-size dimension
        x=torch.cat((gru_out1,gru_out2),dim=2)
        gru_out3,_=self.gru3(x)
        x=torch.cat((gru_out1,gru_out2,gru_out3),dim=2)
        gru_out4,_=self.gru4(x)

        return gru_out4

In [117]:
model=ChronoNet()
output_test_7=model(train_features)

print(output_test_7.shape)

torch.Size([33, 4000, 32])


In [127]:
# Next, add a fully connected layer

class ChronoNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1=ConvBlock1()
        self.block2=ConvBlock2()
        self.block3=ConvBlock3()
        self.gru1=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.gru2=nn.GRU(input_size=32,hidden_size=32,num_layers=1,batch_first=True)
        self.gru3=nn.GRU(input_size=64,hidden_size=32,num_layers=1,batch_first=True)
        self.gru4=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.fc1=nn.Linear(in_features=32,out_features=64)



    def forward(self,x):
        x=self.block1(x)
        x=self.block2(x)
        x=self.block3(x)
        # Because the input shape required by gru is (batch_size, sequence length, feature_size)
        # But the result of the previous conversion calculation is (batchsize, feature_size, sequence length)
        # I need to change the shape
        x=x.permute(0,2,1)
        gru_out1,_=self.gru1(x)
        gru_out2,_=self.gru2(gru_out1)
        # According to the chrononet architecture, we need to connect the calculations of the two layers of GRU according to the feature-size dimension
        x=torch.cat((gru_out1,gru_out2),dim=2)
        gru_out3,_=self.gru3(x)
        x=torch.cat((gru_out1,gru_out2,gru_out3),dim=2)
        gru_out4,_=self.gru4(x)
        x = self.fc1(gru_out4[:, -1, :])  #Usually take the final output of GRU

        return x

In [128]:
model=ChronoNet()
output_test_7=model(train_features)

print(output_test_7.shape)
print(output_test_7)

torch.Size([33, 64])
tensor([[-0.0006,  0.1203,  0.0623,  ...,  0.1123,  0.0983,  0.0780],
        [ 0.1523,  0.0730,  0.3505,  ...,  0.0644,  0.1266,  0.2473],
        [ 0.0691,  0.1145,  0.2082,  ...,  0.1363,  0.1202,  0.2101],
        ...,
        [-0.0378,  0.0938, -0.0893,  ...,  0.1510,  0.0767, -0.0551],
        [-0.0487,  0.0690, -0.0497,  ...,  0.1291,  0.1071, -0.0163],
        [-0.0473,  0.0794, -0.0766,  ...,  0.1403,  0.0967, -0.0395]],
       grad_fn=<AddmmBackward0>)


In [129]:
# add dropout

class ChronoNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1=ConvBlock1()
        self.block2=ConvBlock2()
        self.block3=ConvBlock3()
        self.gru1=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.gru2=nn.GRU(input_size=32,hidden_size=32,num_layers=1,batch_first=True)
        self.gru3=nn.GRU(input_size=64,hidden_size=32,num_layers=1,batch_first=True)
        self.gru4=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.fc1=nn.Linear(in_features=32,out_features=64)
        self.dropout = nn.Dropout(0.5)



    def forward(self,x):
        x=self.block1(x)
        x=self.block2(x)
        x=self.block3(x)
        # Because the input shape required by gru is (batch_size, sequence length, feature_size)
        # But the result of the previous conversion calculation is (batchsize, feature_size, sequence length)
        # I need to change the shape
        x=x.permute(0,2,1)
        gru_out1,_=self.gru1(x)
        gru_out2,_=self.gru2(gru_out1)
        # According to the chrononet architecture, we need to connect the calculations of the two layers of GRU according to the feature-size dimension
        x=torch.cat((gru_out1,gru_out2),dim=2)
        gru_out3,_=self.gru3(x)
        x=torch.cat((gru_out1,gru_out2,gru_out3),dim=2)
        gru_out4,_=self.gru4(x)
        x = self.fc1(gru_out4[:, -1, :])  #Usually take the final output of GRU
        x = self.dropout(x)

        return x

In [130]:
model=ChronoNet()
output_test_8=model(train_features)

print(output_test_8.shape)
print(output_test_8)

torch.Size([33, 64])
tensor([[-3.7648e-01, -0.0000e+00,  3.7256e-01,  ..., -3.2188e-01,
          1.4680e-02, -7.5296e-02],
        [-5.5064e-01, -0.0000e+00,  3.8999e-01,  ..., -5.1643e-01,
          0.0000e+00, -0.0000e+00],
        [-5.3573e-01, -0.0000e+00,  0.0000e+00,  ..., -4.0909e-01,
          0.0000e+00, -2.9524e-04],
        ...,
        [-0.0000e+00, -3.0714e-01,  1.3834e-01,  ..., -2.1615e-01,
         -0.0000e+00, -2.0412e-01],
        [-2.5025e-01, -3.4895e-01,  0.0000e+00,  ..., -1.8650e-01,
         -4.0053e-02, -1.8042e-01],
        [-0.0000e+00, -3.4395e-01,  1.5104e-01,  ..., -1.6353e-01,
         -0.0000e+00, -2.0822e-01]], grad_fn=<MulBackward0>)


In [132]:
## Add the last fully connected layer
# Add the regularization layer dropput

class ChronoNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1=ConvBlock1()
        self.block2=ConvBlock2()
        self.block3=ConvBlock3()
        self.gru1=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.gru2=nn.GRU(input_size=32,hidden_size=32,num_layers=1,batch_first=True)
        self.gru3=nn.GRU(input_size=64,hidden_size=32,num_layers=1,batch_first=True)
        self.gru4=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.fc1=nn.Linear(in_features=32,out_features=64)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(64, 6)  # num_classes is the number of categories



    def forward(self,x):
        x=self.block1(x)
        x=self.block2(x)
        x=self.block3(x)
        # Because the input shape required by gru is (batch_size, sequence length, feature_size)
        # But the result of the previous conversion calculation is (batchsize, feature_size, sequence length)
        # I need to change the shape
        x=x.permute(0,2,1)
        gru_out1,_=self.gru1(x)
        gru_out2,_=self.gru2(gru_out1)
        # According to the chrononet architecture, we need to connect the calculations of the two layers of GRU according to the feature-size dimension
        x=torch.cat((gru_out1,gru_out2),dim=2)
        gru_out3,_=self.gru3(x)
        x=torch.cat((gru_out1,gru_out2,gru_out3),dim=2)
        gru_out4,_=self.gru4(x)
        x = self.fc1(gru_out4[:, -1, :])  #Usually take the final output of GRU
        x = self.dropout(x)
        x = self.fc2(x)
        x=F.softmax(x, dim=1)


        return x

In [133]:
model=ChronoNet()
output_test_9=model(train_features)

print(output_test_9.shape)
print(output_test_9)

torch.Size([33, 6])
tensor([[0.1862, 0.1498, 0.1662, 0.1691, 0.1825, 0.1462],
        [0.1911, 0.1891, 0.1450, 0.1723, 0.1360, 0.1665],
        [0.2297, 0.1708, 0.1398, 0.1566, 0.1499, 0.1532],
        [0.1923, 0.1709, 0.1590, 0.1591, 0.1710, 0.1477],
        [0.1692, 0.1335, 0.1750, 0.1552, 0.1695, 0.1976],
        [0.1951, 0.1838, 0.1524, 0.1526, 0.1534, 0.1628],
        [0.1731, 0.1320, 0.1542, 0.1698, 0.1773, 0.1936],
        [0.1623, 0.1599, 0.1777, 0.1471, 0.1757, 0.1772],
        [0.1559, 0.1662, 0.1422, 0.1757, 0.2008, 0.1592],
        [0.1855, 0.1543, 0.1747, 0.1551, 0.1675, 0.1628],
        [0.1689, 0.1798, 0.1451, 0.1769, 0.1628, 0.1665],
        [0.1653, 0.1640, 0.1516, 0.1478, 0.1903, 0.1810],
        [0.1671, 0.1755, 0.1746, 0.1641, 0.1665, 0.1521],
        [0.1746, 0.1474, 0.1773, 0.1644, 0.1582, 0.1781],
        [0.1639, 0.1833, 0.1390, 0.1947, 0.1774, 0.1417],
        [0.1890, 0.1785, 0.1561, 0.1610, 0.1402, 0.1753],
        [0.1662, 0.1704, 0.1778, 0.1740, 0.1393, 0.1

In [136]:
# Use torch.argmax to get the category index with the highest probability
predicted_classes = torch.argmax(output_test_9, dim=1)

print(predicted_classes.shape)
print(predicted_classes)
print()

torch.Size([33])
tensor([0, 0, 0, 0, 5, 0, 5, 2, 4, 0, 1, 4, 1, 5, 3, 0, 2, 0, 3, 4, 5, 1, 3, 3,
        0, 0, 0, 1, 5, 0, 0, 1, 0])


In [137]:
# Use torch.max to get the probability value and the corresponding category index at the same time
max_probs, predicted_classes = torch.max(output_test_9, dim=1)

print(max_probs)       # Print the highest probability value
print(predicted_classes)  # Print the corresponding category index

tensor([0.1862, 0.1911, 0.2297, 0.1923, 0.1976, 0.1951, 0.1936, 0.1777, 0.2008,
        0.1855, 0.1798, 0.1903, 0.1755, 0.1781, 0.1947, 0.1890, 0.1778, 0.1946,
        0.2069, 0.1861, 0.1964, 0.1980, 0.1890, 0.1882, 0.1991, 0.1893, 0.2019,
        0.1960, 0.1888, 0.1923, 0.1890, 0.1811, 0.1978],
       grad_fn=<MaxBackward0>)
tensor([0, 0, 0, 0, 5, 0, 5, 2, 4, 0, 1, 4, 1, 5, 3, 0, 2, 0, 3, 4, 5, 1, 3, 3,
        0, 0, 0, 1, 5, 0, 0, 1, 0])


In [330]:
# Because we need to train the model first, we need to calculate the loss
# For multi-classification problems, if you choose to use nn.crossentropylss, you need to remove F.softmax(),
# Because this loss function combines Log-Softmax and NLL Loss (Negative Log Likelihood Loss).

class ChronoNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.block1=ConvBlock1()
        self.block2=ConvBlock2()
        self.block3=ConvBlock3()
        self.gru1=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.gru2=nn.GRU(input_size=32,hidden_size=32,num_layers=1,batch_first=True)
        self.gru3=nn.GRU(input_size=64,hidden_size=32,num_layers=1,batch_first=True)
        self.gru4=nn.GRU(input_size=96,hidden_size=32,num_layers=1,batch_first=True)
        self.fc1=nn.Linear(in_features=32,out_features=64)
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(64, 6)



    def forward(self,x):
        x=self.block1(x)
        x=self.block2(x)
        x=self.block3(x)
        # Because the input shape required by gru is (batch_size, sequence length, feature_size)
        # But the result of the previous conversion calculation is (batchsize, feature_size, sequence length)
        # I need to change the shape
        x=x.permute(0,2,1)
        gru_out1,_=self.gru1(x)
        gru_out2,_=self.gru2(gru_out1)
        # According to the chrononet architecture, we need to connect the calculations of the two layers of GRU according to the feature-size dimension
        x=torch.cat((gru_out1,gru_out2),dim=2)
        gru_out3,_=self.gru3(x)
        x=torch.cat((gru_out1,gru_out2,gru_out3),dim=2)
        gru_out4,_=self.gru4(x)
        x = self.fc1(gru_out4[:, -1, :])  #Usually take the final output of GRU
        x = self.dropout(x)
        x = self.fc2(x)

        return x

In [296]:
model=ChronoNet()
output_test_10=model(train_features)

print(output_test_10.shape)
print(output_test_10)

torch.Size([33, 6])
tensor([[-1.2378e-01,  2.0501e-01, -2.0715e-01, -2.4703e-02, -1.1928e-01,
          1.7309e-02],
        [-2.3920e-02,  3.7598e-01,  2.4398e-01, -5.1875e-02, -2.4172e-01,
         -1.3735e-02],
        [-2.7396e-02,  2.0630e-01, -8.7438e-02,  3.5428e-02, -2.1047e-01,
          2.2017e-01],
        [-4.1249e-02,  6.7034e-02,  3.2156e-02, -1.6259e-02, -2.5943e-01,
         -2.0576e-02],
        [ 7.8136e-02,  1.3621e-01, -1.2375e-01, -9.5075e-02, -9.3135e-02,
          2.4104e-01],
        [-1.7018e-01,  2.0280e-01, -2.4220e-01,  3.2697e-04, -1.2233e-01,
         -1.3607e-01],
        [ 4.3858e-03,  1.9670e-01, -1.6418e-01,  2.5328e-02, -2.3579e-01,
          9.0969e-02],
        [ 2.0663e-02,  1.9062e-01, -7.9694e-02,  7.0192e-02, -2.3042e-01,
          1.1258e-01],
        [-2.9393e-01,  1.7228e-01, -1.0757e-01,  1.3902e-01, -7.7695e-02,
         -6.2754e-02],
        [-2.0392e-02, -9.3418e-02, -2.5637e-01,  4.9833e-02, -2.5574e-01,
          2.6642e-02],
        [-

## use lightningmodule to organize my model

In [203]:
# Because in this part I want to verify whether organize model code into LightningModule is successful, I need to prepare the data first
# The data here is only used temporarily, and the data processing flow will be re-integrated later
feature_dataset=TensorDataset(train_features,train_labels)

# This is for batch shuffuling, so that the same batch can be obtained each time
torch.manual_seed(123)
train_loader= DataLoader(feature_dataset, batch_size=32,num_workers=12,persistent_workers=True, shuffle=True)

torch.manual_seed(123)
val_dataset=TensorDataset(val_features,val_labels)
val_loader= DataLoader(val_dataset, batch_size=32, num_workers=12,persistent_workers=True,shuffle=False)

In [331]:
class ChronoNetModule(L.LightningModule):
    def __init__(self,model,learning_rate):
        super().__init__()
        self.model=model
        self.lr=learning_rate
        self.train_acc=torchmetrics.Accuracy(task='multiclass',num_classes=6)
        self.val_acc=torchmetrics.Accuracy(task='multiclass',num_classes=6)



    def forward(self,x):
        '''
        x: feature data for training 

        This is the part of the neural model that is used to read or build
        define the computation performed at every call define the computation performed at every call

        return:
            model's output
        '''
        return self.model(x)
    
    def training_step(self,batch,batch_idx):
        '''
        we need to train the model right here
        including provide the loss step, acc calculation step

        This function will perform the following operations:
        1. Calculate the loss value for each training batch
        2. Perform optimization and gradient descent (automatically performed by lightningModule)
        3. Update parameters (automatically performed by lightningModule)
        https://lightning.ai/docs/pytorch/stable/common/lightning_module.html#training
        '''
        # read batch data
        features,labels=batch

        # Send data to GPU for training
        features=features.to(self.device)
        labels=labels.to(self.device)
        
        # feeding feature to the model
        # Only self() is used here because the forward() function is called automatically
        # forward propagation
        out=self(features)

        # After getting the output of the model, you need to calculate the loss function
        loss=F.cross_entropy(out, labels)

        # logs metrics for each training_step,
        # and the average across the epoch, to the progress bar and logger
        self.log("train_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        # After adding self.train_acc=torchmetrics.Accuracy(task='multiclass',num_classes=6)
        # You can add the steps to calculate accuracy below
        # Because we use cross_entropy() as the loss function
        # So we need to use argmax to convert to normal values ​​for accuracy calculation
        # predicted_labels=torch.argmax(out)
        # But torchmetrics.Accuracy is already configured to handle logits for multi-class classification problems. 
        # It will apply softmax (or log_softmax) and calculate argmax internally to determine the most likely category.
        acc=self.train_acc(out,labels)
        self.log("train_acc", acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        

        # In training_step(), we only calculate and return the loss. 
        # The optimization part does not belong to this part, 
        # and the optimization method will be defined in configure_optimizers.
        return loss # this is passed to the optimizer for training
    
    def validation_step(self,batch,batch_idx):
        '''
        The val step is not used in training, but only in validation.
        '''
        features,labels=batch

        # Send data to GPU for training
        features=features.to(self.device)
        labels=labels.to(self.device)
        
        out=self(features)
        loss=F.cross_entropy(out, labels)

        self.log("val_loss", loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)

        acc=self.val_acc(out,labels)
        self.log("val_acc", acc, on_step=True, on_epoch=True, prog_bar=True, logger=True)


    def configure_optimizers(self):
        '''
        Choose what optimizers and learning-rate schedulers to use in your optimization.

        The optimizer defined here will be automatically called by lightningModule
        Used in the training step
        '''
        optimizer=torch.optim.Adam(self.parameters(), lr=self.lr)

        return optimizer


    def on_train_epoch_end(self):
        pass


    def on_validation_epoch_end(self):
        pass


    def predict_step(self, batch, batch_idx, dataloader_idx=0):
        # If you have only one tensor (feature) in your TensorDataset, batch will be a tuple containing a tensor and an empty tuple (since there are no labels)
        features= batch[0]
        features=features.to(self.device)
        predictions = self(features)
        # Because what our model ultimately wants is the probability of an object corresponding to all categories, so add the softmax function here
        probabilities = torch.softmax(predictions, dim=1)

        return probabilities




In [226]:
model=ChronoNet()
ChronoNetModule=ChronoNetModule(model=model,learning_rate=0.1)

trainer=L.Trainer(
    max_epochs=3,
    accelerator="gpu", # set to 'auto' or 'gpu' to use gpu if possible
    devices=1, # use all gpus if applicable like value=1 or "auto"
)

# train the model
trainer.fit(
    model=ChronoNetModule,
    train_dataloaders=train_loader,
    val_dataloaders=val_loader,
)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



  | Name      | Type               | Params
-------------------------------------------------
0 | model     | ChronoNet          | 131 K 
1 | train_acc | MulticlassAccuracy | 0     
2 | val_acc   | MulticlassAccuracy | 0     
-------------------------------------------------
131 K     Trainable params
0         Non-trainable params
131 K     Total params
0.527     Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]

/opt/homebrew/Caskroom/miniforge/base/envs/birdclef/lib/python3.10/site-packages/lightning/pytorch/trainer/call.py:54: Detected KeyboardInterrupt, attempting graceful shutdown...


## setup lightning data loader module for model training

In [298]:
# Here I want to verify the role of tensordataset and dataloader

dataset=TensorDataset(train_features,train_labels)

print(dataset)

# Access and print the first sample
first_data, first_label = dataset[0]
print("First sample features:", first_data)
print("First sample label:", first_label)

loader= DataLoader(dataset, batch_size=32, shuffle=True)

print(loader)

# In PyTorch, the DataLoader object itself is not a structure for direct visualization or direct printing of contents, but an iterable object,
# used to generate data in batches in each iteration. If you want to see the contents of the data loaded from the DataLoader, 
# you can print out the contents of one or more batches of data by iterating it.

# View data contents by iterating DataLoader
for i, (data, label) in enumerate(loader):
    print(f"Batch {i + 1}:")
    print("Features:", data)  # print feature
    print("Labels:", label)   # print label
    # If you only want to see the first batch of data, you can add a break statement here
    break  # Uncomment this line and only print the first batch of data

dataset=TensorDataset(train_features)

print(dataset)

# Access and print the first sample
first_data= dataset[0]
print("First sample features:", first_data)

loader= DataLoader(dataset, batch_size=32, shuffle=True)

loader= DataLoader(dataset, batch_size=32, shuffle=True)

print(loader)

# View data contents by iterating DataLoader
for i, data in enumerate(loader):
    print(f"Batch {i + 1}:")
    print("Features:", data) 
    break 




<torch.utils.data.dataset.TensorDataset object at 0x161c588e0>
First sample features: tensor([[-0.6015,  1.7506,  1.3820,  ..., -0.1865,  0.2928,  0.2459],
        [ 0.4801,  4.4550,  1.1716,  ..., -0.4470, -0.4560, -0.5860],
        [ 1.4925,  2.7271,  5.8421,  ...,  0.8986,  0.4440,  0.9471],
        [ 0.4867,  2.0776,  2.4248,  ..., -0.0830,  0.1843,  0.1838],
        [ 1.7844,  1.7919,  1.4389,  ...,  0.2124,  0.7954,  0.3240]])
First sample label: tensor(2.)
<torch.utils.data.dataloader.DataLoader object at 0x1620a2740>
Batch 1:
Features: tensor([[[-0.5475, -0.3557, -0.3218,  ...,  1.2892,  1.7215,  1.4805],
         [-0.1687, -0.3946, -0.3593,  ..., -0.7355, -0.7527, -0.9198],
         [-0.4361, -0.6789, -0.5990,  ...,  0.1586,  0.2221,  0.1589],
         [-0.4438, -0.5252, -0.5615,  ..., -0.3533, -0.6874,  0.1831],
         [-0.5679, -0.2651, -0.3601,  ..., -0.2300, -0.0471, -0.8231]],

        [[-0.6069, -0.7100, -0.6757,  ..., -0.8249, -0.8882, -0.9098],
         [-0.5898, -0.

In [332]:
class ChronoNetDataModule(L.LightningDataModule):
    def __init__(self,train=None,train_label=None,val=None,val_label=None,pred=None,batch_size:int=32,num_workers:int=12):
        super().__init__()
        self.batch_size=batch_size
        self.num_workers=num_workers
        self.train=train
        self.train_label=train_label
        self.val=val
        self.val_label=val_label
        self.pred=pred


    def train_dataloader(self):
        # You need to create a tensor to include the data and labels together
        # The length of the first dimension of the two must be equal
        dataset=TensorDataset(self.train,self.train_label)
        loader= DataLoader(dataset, batch_size=self.batch_size, num_workers=self.num_workers, persistent_workers=True, shuffle=True)

        return loader
    
    def val_dataloader(self):
        dataset=TensorDataset(self.val,self.val_label)
        loader= DataLoader(dataset, batch_size=self.batch_size, num_workers=self.num_workers, persistent_workers=True,shuffle=False)

        return loader

    def predict_dataloader(self):
        dataset=TensorDataset(self.pred)
        loader=DataLoader(dataset,batch_size=self.batch_size,shuffle=False)

        return loader


In [333]:
# Previously we used a separate dataloader to feed the model
# Here we encapsulate the dataloader and use this class to read data for training

dm=ChronoNetDataModule(train=train_features,train_label=train_labels,val=val_features,val_label=val_labels,batch_size=33)
print(dm)




model=ChronoNet()
ChronoNetModule=ChronoNetModule(model=model,learning_rate=0.1)

trainer=L.Trainer(
    max_epochs=1,
    accelerator="gpu", # set to 'auto' or 'gpu' to use gpu if possible
    devices=1, # use all gpus if applicable like value=1 or "auto"
    default_root_dir='/Users/yiding/personal_projects/ML/github_repo/birdcief/code/model-training/',
    logger=CSVLogger(save_dir='/Users/yiding/personal_projects/ML/github_repo/birdcief/code/model-training/log/',name='chrononet')
)

# train the model
trainer.fit(
    model=ChronoNetModule,
    datamodule=dm
)


GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs



  | Name      | Type               | Params
-------------------------------------------------
0 | model     | ChronoNet          | 131 K 
1 | train_acc | MulticlassAccuracy | 0     
2 | val_acc   | MulticlassAccuracy | 0     
-------------------------------------------------
131 K     Trainable params
0         Non-trainable params
131 K     Total params
0.527     Total estimated model params size (MB)


<__main__.ChronoNetDataModule object at 0x1623807c0>
                                                                           

/opt/homebrew/Caskroom/miniforge/base/envs/birdclef/lib/python3.10/site-packages/lightning/pytorch/loops/fit_loop.py:298: The number of training batches (1) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 0: 100%|██████████| 1/1 [00:48<00:00,  0.02it/s, v_num=9, train_loss_step=1.800, train_acc_step=0.182, val_loss_step=2.870, val_acc_step=0.000, val_loss_epoch=3.340, val_acc_epoch=0.147, train_loss_epoch=1.800, train_acc_epoch=0.182]

`Trainer.fit` stopped: `max_epochs=1` reached.


Epoch 0: 100%|██████████| 1/1 [00:48<00:00,  0.02it/s, v_num=9, train_loss_step=1.800, train_acc_step=0.182, val_loss_step=2.870, val_acc_step=0.000, val_loss_epoch=3.340, val_acc_epoch=0.147, train_loss_epoch=1.800, train_acc_epoch=0.182]


In [334]:
dm=ChronoNetDataModule(pred=val_features,batch_size=33)


predictions=trainer.predict(model=ChronoNetModule,datamodule=dm)

/opt/homebrew/Caskroom/miniforge/base/envs/birdclef/lib/python3.10/site-packages/lightning/pytorch/trainer/connectors/data_connector.py:441: The 'predict_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Predicting: |          | 0/? [00:00<?, ?it/s]

Predicting DataLoader 0: 100%|██████████| 2/2 [00:18<00:00,  0.11it/s]


In [335]:
print(len(predictions[0]))
print(len(predictions))
print(len(predictions[1]))
print(predictions)

33
2
1
[tensor([[5.6575e-01, 1.0018e-03, 4.6197e-03, 3.2325e-01, 9.1726e-02, 1.3648e-02],
        [5.4818e-01, 4.8104e-04, 5.5269e-03, 2.7513e-01, 1.5051e-01, 2.0169e-02],
        [5.4210e-01, 5.1319e-04, 5.4765e-03, 2.8120e-01, 1.5046e-01, 2.0249e-02],
        [3.0845e-01, 1.9877e-01, 3.2014e-02, 4.1173e-01, 2.9122e-02, 1.9915e-02],
        [5.3649e-01, 2.9894e-03, 8.7351e-03, 3.3508e-01, 9.7630e-02, 1.9080e-02],
        [4.6169e-01, 5.7414e-03, 9.5079e-03, 3.9689e-01, 1.0934e-01, 1.6827e-02],
        [1.6857e-01, 8.0688e-02, 5.3463e-02, 2.4675e-01, 3.3104e-01, 1.1950e-01],
        [3.5587e-03, 9.4635e-01, 2.1553e-02, 2.3794e-02, 1.4574e-03, 3.2845e-03],
        [1.0027e-01, 1.6685e-01, 1.0053e-01, 2.0234e-01, 2.9135e-01, 1.3866e-01],
        [9.3389e-02, 1.6281e-01, 1.2310e-01, 1.1841e-01, 2.8799e-01, 2.1430e-01],
        [3.9036e-01, 6.5489e-03, 1.0268e-02, 2.4627e-01, 2.9684e-01, 4.9722e-02],
        [4.8367e-03, 9.1764e-01, 3.9673e-02, 2.5409e-02, 3.2278e-03, 9.2131e-03],
        

In [336]:
## Because the dataloader to be predicted is divided into two batches, we need to stack all batch outputs together here

result = torch.cat((predictions[0], predictions[1]), dim=0)
print(result.shape)
print(result)

torch.Size([34, 6])
tensor([[5.6575e-01, 1.0018e-03, 4.6197e-03, 3.2325e-01, 9.1726e-02, 1.3648e-02],
        [5.4818e-01, 4.8104e-04, 5.5269e-03, 2.7513e-01, 1.5051e-01, 2.0169e-02],
        [5.4210e-01, 5.1319e-04, 5.4765e-03, 2.8120e-01, 1.5046e-01, 2.0249e-02],
        [3.0845e-01, 1.9877e-01, 3.2014e-02, 4.1173e-01, 2.9122e-02, 1.9915e-02],
        [5.3649e-01, 2.9894e-03, 8.7351e-03, 3.3508e-01, 9.7630e-02, 1.9080e-02],
        [4.6169e-01, 5.7414e-03, 9.5079e-03, 3.9689e-01, 1.0934e-01, 1.6827e-02],
        [1.6857e-01, 8.0688e-02, 5.3463e-02, 2.4675e-01, 3.3104e-01, 1.1950e-01],
        [3.5587e-03, 9.4635e-01, 2.1553e-02, 2.3794e-02, 1.4574e-03, 3.2845e-03],
        [1.0027e-01, 1.6685e-01, 1.0053e-01, 2.0234e-01, 2.9135e-01, 1.3866e-01],
        [9.3389e-02, 1.6281e-01, 1.2310e-01, 1.1841e-01, 2.8799e-01, 2.1430e-01],
        [3.9036e-01, 6.5489e-03, 1.0268e-02, 2.4627e-01, 2.9684e-01, 4.9722e-02],
        [4.8367e-03, 9.1764e-01, 3.9673e-02, 2.5409e-02, 3.2278e-03, 9.2131e-0

## setup callbacks for saving checkpoint and earlystopping

In [337]:
dm=ChronoNetDataModule(train=train_features,train_label=train_labels,val=val_features,val_label=val_labels,batch_size=33)

# Set model checkpoint
checkpoint_callback = ModelCheckpoint(
    monitor='val_loss', # Monitor validation loss
    dirpath='/Users/yiding/personal_projects/ML/github_repo/birdcief/code/model-training/checkpoints/',
    filename='chrononet-{epoch:02d}-{val_loss:.2f}',
    save_top_k=1, # Save the model with the lowest validation loss
    mode='min',  # Minimize monitoring metrics
    auto_insert_metric_name=False  # Prevent automatic addition of metric names to paths
)

## Set early stopping point
early_stop_callback = EarlyStopping(
   monitor='val_acc',
   min_delta=0.0,
   patience=3,
   verbose=True,
   mode='max'
)


trainer=L.Trainer(
    callbacks=[early_stop_callback, checkpoint_callback],
    max_epochs=3,
    accelerator="gpu", # set to 'auto' or 'gpu' to use gpu if possible
    devices=1, # use all gpus if applicable like value=1 or "auto"
    default_root_dir='/Users/yiding/personal_projects/ML/github_repo/birdcief/code/model-training/',
    logger=CSVLogger(save_dir='/Users/yiding/personal_projects/ML/github_repo/birdcief/code/model-training/log/',name='chrononet')
)

# train the model
trainer.fit(
    model=ChronoNetModule,
    datamodule=dm
)

GPU available: True (mps), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


/opt/homebrew/Caskroom/miniforge/base/envs/birdclef/lib/python3.10/site-packages/lightning/pytorch/callbacks/model_checkpoint.py:653: Checkpoint directory /Users/yiding/personal_projects/ML/github_repo/birdcief/code/model-training/checkpoints exists and is not empty.

  | Name      | Type               | Params
-------------------------------------------------
0 | model     | ChronoNet          | 131 K 
1 | train_acc | MulticlassAccuracy | 0     
2 | val_acc   | MulticlassAccuracy | 0     
-------------------------------------------------
131 K     Trainable params
0         Non-trainable params
131 K     Total params
0.527     Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

AttributeError: 'NoneType' object has no attribute 'size'