In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

Steps added:
**Exploratory Data Analysis (EDA):**

Waveform Plot: Shows the raw audio waveform.
Spectrogram: Visualizes frequencies over time in dB scale.
Zero-crossing rate: How often the signal changes from positive to negative.
Root Mean Square Energy (RMSE): Measures the amplitude (energy) in the signal.
**Feature Extraction:**

MFCC (Mel Frequency Cepstral Coefficients): Essential feature for audio classification.
Chroma feature: Used to represent 12 different pitch classes.
Spectral Contrast: Measures the difference between peaks and valleys in a spectrum.
**Data Augmentation:**

Adding Noise: Introduces random noise to simulate different environments.
Time Stretching: Stretches the audio, changing the speed without altering pitch.
Pitch Shifting: Changes the pitch of the audio without affecting speed.
Time Shifting: Shifts the audio data to the right/left to simulate offset recordings.

In [None]:
# System libraries
import os
from IPython.display import Markdown, Audio, display

# Data manipulation and visualization libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

# Audio processing libraries
import librosa
import librosa.display

# Play Audio sample
audio_file_name = "data/kids_are_talking_by_the_door.wav"
Audio(audio_file_name)

# Load the audio file
data, sample_rate = librosa.load(audio_file_name)
print(f'Sample rate: {sample_rate}')
print(f'Audio data type: {type(data)}')
print(f'Audio data shape: {data.shape}')

# Convert audio to DataFrame
df = pd.DataFrame(data)

# Describe the audio data
print(df.describe())

# Plot the waveform of the audio data
plt.figure(figsize=(10, 5))
plt.plot(data, lw=1)
plt.title('Kids are talking by the door - Audio waveform')
plt.xlabel('Sample index')
plt.ylabel('Amplitude')
plt.show()

# Trim silence from the audio file
data_trimmed, _ = librosa.effects.trim(data, top_db=20)

# Convert trimmed audio to DataFrame
df_trimmed = pd.DataFrame(data_trimmed)

# Describe the trimmed audio data
print(df_trimmed.describe())

# Plot the trimmed audio waveform
plt.figure(figsize=(10, 5))
plt.plot(data_trimmed, lw=1)
plt.title('Kids are talking by the door - Trimmed audio waveform')
plt.xlabel('Sample index')
plt.ylabel('Amplitude')
plt.show()

### Additional EDA ###

# Plot the Spectrogram
plt.figure(figsize=(10, 5))
D = librosa.amplitude_to_db(np.abs(librosa.stft(data)), ref=np.max)
librosa.display.specshow(D, sr=sample_rate, x_axis='time', y_axis='log')
plt.colorbar(format='%+2.0f dB')
plt.title('Spectrogram (Log scale)')
plt.show()

# Zero-crossing rate
zero_crossings = librosa.zero_crossings(data, pad=False)
print(f'Zero-crossing rate: {sum(zero_crossings)}')

# Plot the zero-crossing rate over time
zcr = librosa.feature.zero_crossing_rate(data)
plt.figure(figsize=(10, 5))
plt.plot(zcr[0])
plt.title('Zero Crossing Rate')
plt.xlabel('Frames')
plt.ylabel('Rate')
plt.show()

# Root Mean Square Energy (RMSE)
rms = librosa.feature.rms(data)
plt.figure(figsize=(10, 5))
plt.plot(rms[0])
plt.title('Root Mean Square Energy (RMSE)')
plt.xlabel('Frames')
plt.ylabel('Energy')
plt.show()

### Feature Extraction ###

# Mel Frequency Cepstral Coefficients (MFCC)
mfccs = librosa.feature.mfcc(data, sr=sample_rate, n_mfcc=13)
plt.figure(figsize=(10, 5))
librosa.display.specshow(mfccs, sr=sample_rate, x_axis='time')
plt.colorbar()
plt.title('MFCC')
plt.show()

# Chroma Feature
chroma_stft = librosa.feature.chroma_stft(data, sr=sample_rate)
plt.figure(figsize=(10, 5))
librosa.display.specshow(chroma_stft, sr=sample_rate, x_axis='time', y_axis='chroma')
plt.colorbar()
plt.title('Chroma Feature')
plt.show()

# Spectral Contrast
spectral_contrast = librosa.feature.spectral_contrast(data, sr=sample_rate)
plt.figure(figsize=(10, 5))
librosa.display.specshow(spectral_contrast, sr=sample_rate, x_axis='time')
plt.colorbar()
plt.title('Spectral Contrast')
plt.show()

### Data Augmentation ###

# 1. Adding noise
noise = np.random.randn(len(data))
data_noisy = data + 0.005 * noise

# Plot noisy data
plt.figure(figsize=(10, 5))
plt.plot(data_noisy)
plt.title('Noisy Audio')
plt.show()

# 2. Time Stretching
data_stretch = librosa.effects.time_stretch(data, rate=1.2)

# Plot time-stretched data
plt.figure(figsize=(10, 5))
plt.plot(data_stretch)
plt.title('Time-Stretched Audio (Rate = 1.2)')
plt.show()

# 3. Pitch Shifting
data_pitch_shifted = librosa.effects.pitch_shift(data, sr=sample_rate, n_steps=2)

# Plot pitch-shifted data
plt.figure(figsize=(10, 5))
plt.plot(data_pitch_shifted)
plt.title('Pitch Shifted Audio (2 semitones up)')
plt.show()

# 4. Time Shifting
shifted_data = np.roll(data, int(sample_rate / 10))

# Plot time-shifted data
plt.figure(figsize=(10, 5))
plt.plot(shifted_data)
plt.title('Time Shifted Audio')
plt.show()
