<a href="https://colab.research.google.com/github/VimalRency/Spectogram/blob/main/classification_of_spectrogram_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
pip install librosa




In [4]:
pip install soundfile



In [5]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [28]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Function to load and preprocess spectrogram images
def load_spectrogram_images(data_dir, img_size=(128, 128)):
    features = []
    labels = []

    for root, _, files in os.walk(data_dir):
        for file in files:
            if file.endswith('.png'):
                img_path = os.path.join(root, file)
                try:
                    img = load_img(img_path, target_size=img_size)
                    img = img_to_array(img)
                    img = np.mean(img, axis=2)  # Convert to grayscale by averaging channels
                    features.append(img)
                    labels.append(os.path.basename(root))  # Use directory name as label
                    print(f"Loaded image: {img_path}")  # Debugging print
                except Exception as e:
                    print(f"Error loading image {img_path}: {e}")

    features = np.array(features)
    labels = np.array(labels)

    print(f"Total images loaded: {len(features)}")  # Debugging print
    print(f"Features shape: {features.shape}")  # Debugging print
    print(f"Labels shape: {labels.shape}")  # Debugging print
    return features, labels

# Load and preprocess spectrogram images
data_dir = '/content/drive/MyDrive/spec1'  # Update this path to your actual data directory
img_size = (128, 128)
X, y = load_spectrogram_images(data_dir, img_size=img_size)


Loaded image: /content/drive/MyDrive/spec1/yes/gayathri.png
Loaded image: /content/drive/MyDrive/spec1/yes/vimal.png
Loaded image: /content/drive/MyDrive/spec1/yes/kathy.png
Loaded image: /content/drive/MyDrive/spec1/yes/aish.png
Loaded image: /content/drive/MyDrive/spec1/yes/carmel.png
Loaded image: /content/drive/MyDrive/spec1/no/cno.png
Loaded image: /content/drive/MyDrive/spec1/no/kno.png
Loaded image: /content/drive/MyDrive/spec1/no/ano.png
Loaded image: /content/drive/MyDrive/spec1/no/vno.png
Loaded image: /content/drive/MyDrive/spec1/no/gno.png
Total images loaded: 10
Features shape: (10, 128, 128)
Labels shape: (10,)


In [29]:
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC

# Flatten the spectrogram images
X_flattened = X.reshape(len(X), -1)

# Encode class labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_flattened, y_encoded, test_size=0.2, random_state=42)

# Train SVM
svm_model = SVC(kernel='linear', C=1.0, random_state=42)
svm_model.fit(X_train, y_train)

# Debugging prints
print(f"Training data shape: {X_train.shape}")
print(f"Testing data shape: {X_test.shape}")
print(f"Number of classes: {len(label_encoder.classes_)}")


Training data shape: (8, 16384)
Testing data shape: (2, 16384)
Number of classes: 2


In [30]:
import librosa
import soundfile as sf

# Function to load and preprocess a single spectrogram image
def load_and_preprocess_image(img_path, img_size=(128, 128)):
    img = load_img(img_path, target_size=img_size)
    img = img_to_array(img)
    img = np.mean(img, axis=2)  # Convert to grayscale by averaging channels
    return img

# Function to predict class and reconstruct audio from spectrogram
def predict_and_reconstruct(model, img_path, label_encoder, img_size=(128, 128), sr=22050):
    # Load and preprocess the image
    img = load_and_preprocess_image(img_path, img_size)

    # Extract features (log power spectrogram)
    log_power_spectrogram = np.log1p(np.abs(img))
    log_power_spectrogram = log_power_spectrogram.flatten().reshape(1, -1)

    # Debugging print
    print(f"Input features shape for prediction: {log_power_spectrogram.shape}")

    # Predict the class
    predicted_class_index = model.predict(log_power_spectrogram)
    predicted_class = label_encoder.inverse_transform(predicted_class_index)[0]

    # Reconstruct the audio
    power_spectrogram = np.expm1(log_power_spectrogram.reshape(img_size))

    # Debugging: Check for non-finite values in the power spectrogram
    if not np.isfinite(power_spectrogram).all():
        print("Non-finite values found in power spectrogram!")
        power_spectrogram = np.nan_to_num(power_spectrogram)  # Replace non-finite values with zero
        print("Non-finite values replaced with zero.")

    # Convert power spectrogram back to amplitude spectrogram
    amplitude_spectrogram = np.sqrt(power_spectrogram)

    # Inverse Short-Time Fourier Transform to get the time-domain signal
    reconstructed_audio = librosa.istft(amplitude_spectrogram)

    # Save the reconstructed audio to a file
    output_audio_path = os.path.splitext(img_path)[0] + "_reconstructed.wav"
    sf.write(output_audio_path, reconstructed_audio, sr)

    return predicted_class, output_audio_path, reconstructed_audio

# Example usage
new_image_path = '/content/drive/MyDrive/spec2.png'  # Replace with your new image path
if os.path.exists(new_image_path):
    predicted_class, output_audio_path, reconstructed_audio = predict_and_reconstruct(
        svm_model, new_image_path, label_encoder, img_size=img_size, sr=22050
    )
    print(f'Predicted Class: {predicted_class}')
    print(f'Reconstructed Audio Path: {output_audio_path}')
else:
    print(f'File does not exist: {new_image_path}')


Input features shape for prediction: (1, 16384)
Predicted Class: yes
Reconstructed Audio Path: /content/drive/MyDrive/spec2_reconstructed.wav
