# Audio → Spectrogram

In [2]:
import os
import numpy as np
import tensorflow as tf
from PIL import Image

# -------------------------------
# 1. Preloaded audio
# -------------------------------
sr = 8000
duration_seconds = 2
num_samples = sr * duration_seconds
y = np.random.rand(num_samples).astype(np.float32)
print("Audio array created!")
print(f"Shape: {y.shape}, Sample rate: {sr}")

# -------------------------------
# 2. Compute Spectrogram via TensorFlow STFT
# -------------------------------
frame_length = 256
frame_step = 128

y_tensor = tf.convert_to_tensor(y, dtype=tf.float32)
stft = tf.signal.stft(y_tensor, frame_length=frame_length, frame_step=frame_step)
spectrogram = tf.abs(stft)
spectrogram = tf.math.pow(spectrogram, 0.5)  # approximate log/Mel scale
print("Spectrogram computed via TensorFlow.")
print("Spectrogram shape:", spectrogram.shape)

# -------------------------------
# 3. Normalize and convert to uint8
# -------------------------------
spec_min = tf.reduce_min(spectrogram)
spec_max = tf.reduce_max(spectrogram)
spectrogram_norm = (spectrogram - spec_min) / (spec_max - spec_min)  # 0-1
spectrogram_uint8 = tf.image.convert_image_dtype(spectrogram_norm[..., tf.newaxis], tf.uint8)
spectrogram_uint8 = tf.squeeze(spectrogram_uint8)  # H x W

# -------------------------------
# 4. Save as PNG to a subfolder
# -------------------------------
output_dir = "spectrogram_outputs"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "spectrogram1.png")

# Convert to PIL Image and save
img = Image.fromarray(spectrogram_uint8.numpy(), mode='L')
img.save(output_path)
print(f"Spectrogram saved to {output_path}")

# -------------------------------
# 5. Optional: Convert to Tensor for ML
# -------------------------------
spectrogram_tensor = tf.expand_dims(spectrogram, -1)  # H x W x 1
print("Tensor shape ready for ML:", spectrogram_tensor.shape)



Audio array created!
Shape: (16000,), Sample rate: 8000
Spectrogram computed via TensorFlow.
Spectrogram shape: (124, 129)
Spectrogram saved to spectrogram_outputs\spectrogram1.png
Tensor shape ready for ML: (124, 129, 1)


In [3]:
import os
import numpy as np
import tensorflow as tf
import soundfile as sf
from PIL import Image

# -------------------------------
# 1. Load trumpet.wav
# -------------------------------
audio_path = "trumpet.wav"
y, sr = sf.read(audio_path, dtype='float32')  # y.shape = (num_samples,)
print(f"Loaded audio: {audio_path}")
print(f"Audio shape: {y.shape}, Sample rate: {sr}")

# -------------------------------
# 2. Compute Spectrogram via TensorFlow STFT
# -------------------------------
frame_length = 256
frame_step = 128

y_tensor = tf.convert_to_tensor(y, dtype=tf.float32)
stft = tf.signal.stft(y_tensor, frame_length=frame_length, frame_step=frame_step)
spectrogram = tf.abs(stft)
spectrogram = tf.math.pow(spectrogram, 0.5)  # approximate log/Mel scale
print("Spectrogram computed via TensorFlow.")
print("Spectrogram shape:", spectrogram.shape)

# -------------------------------
# 3. Normalize and convert to uint8 for saving
# -------------------------------
spec_min = tf.reduce_min(spectrogram)
spec_max = tf.reduce_max(spectrogram)
spectrogram_norm = (spectrogram - spec_min) / (spec_max - spec_min)  # 0-1
spectrogram_uint8 = tf.image.convert_image_dtype(spectrogram_norm[..., tf.newaxis], tf.uint8)
spectrogram_uint8 = tf.squeeze(spectrogram_uint8)  # H x W

# -------------------------------
# 4. Save as PNG to a subfolder
# -------------------------------
output_dir = "spectrogram_outputs"
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "trumpet_spectrogram.png")

img = Image.fromarray(spectrogram_uint8.numpy(), mode='L')
img.save(output_path)
print(f"Spectrogram saved to {output_path}")

# -------------------------------
# 5. Optional: Convert to Tensor for ML
# -------------------------------
spectrogram_tensor = tf.expand_dims(spectrogram, -1)  # H x W x 1
print("Tensor shape ready for ML:", spectrogram_tensor.shape)


Loaded audio: trumpet.wav
Audio shape: (143775,), Sample rate: 44100
Spectrogram computed via TensorFlow.
Spectrogram shape: (1122, 129)
Spectrogram saved to spectrogram_outputs\trumpet_spectrogram.png
Tensor shape ready for ML: (1122, 129, 1)


# Text → Embeddings → “Image”

In [4]:
import os
import numpy as np
from sentence_transformers import SentenceTransformer
from PIL import Image

# -------------------------------
# 1. Create output folder
# -------------------------------
output_dir = "sentence_embeddings_outputs"
os.makedirs(output_dir, exist_ok=True)

# -------------------------------
# 2. Load SentenceTransformer model
# -------------------------------
model = SentenceTransformer('all-MiniLM-L6-v2')

# -------------------------------
# 3. Prepare paragraph / sentences
# -------------------------------
paragraph = """
Computer vision enables machines to interpret visual information.
Deep learning has revolutionized image recognition.
Datasets are critical to model performance.
"""

sentences = paragraph.strip().split("\n")

# -------------------------------
# 4. Encode sentences
# -------------------------------
embeddings = model.encode(sentences)
print("Embeddings shape:", embeddings.shape)

# -------------------------------
# 5. Save each embedding as a .npy file
# -------------------------------
for i, embedding in enumerate(embeddings):
    npy_path = os.path.join(output_dir, f"sentence_{i+1}_embedding.npy")
    np.save(npy_path, embedding)
    print(f"Saved embedding for sentence {i+1} to {npy_path}")

# -------------------------------
# 6. Save embeddings as a heatmap PNG 
# -------------------------------
# Normalize embeddings to 0-255 for image display
emb_min = embeddings.min()
emb_max = embeddings.max()
emb_norm = ((embeddings - emb_min) / (emb_max - emb_min) * 255).astype(np.uint8)

# Convert to PIL image (height = sentences, width = embedding dimension)
heatmap_img = Image.fromarray(emb_norm)
heatmap_path = os.path.join(output_dir, "embeddings_heatmap.png")
heatmap_img.save(heatmap_path)
print(f"Saved embeddings heatmap to {heatmap_path}")



Embeddings shape: (3, 384)
Saved embedding for sentence 1 to sentence_embeddings_outputs\sentence_1_embedding.npy
Saved embedding for sentence 2 to sentence_embeddings_outputs\sentence_2_embedding.npy
Saved embedding for sentence 3 to sentence_embeddings_outputs\sentence_3_embedding.npy
Saved embeddings heatmap to sentence_embeddings_outputs\embeddings_heatmap.png


# Discussion:
- Neural networks operate on structured tensors.
- Images, audio, and text can all be represented as numeric grids.
