In [None]:
!pip install pandas
!pip install numpy
!pip install scikit-learn==1.6.0
!pip install tensorflow

In [None]:
import pandas as pd
import numpy as np
import os
import cv2
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix


In [None]:
path ='/Users/jathin/Desktop/sentiment analysis/archive/TRAIN.csv'

df=pd.read_csv(path)
print(df.head())

In [49]:
print((df['Class']== 'Positive').sum())

82


In [None]:
class_counts = df['Class'].value_counts()
print(class_counts)
print("Number of unique classes:", len(class_counts))

In [None]:
train_df = pd.read_csv("TRAIN.csv")  # Replace with your actual file name
print(train_df.head())  # Show first few rows

To convert audio to Spectrogram use libraries such as librosa, matplotlib

In [None]:

# Function to convert a single audio file to a spectrogram image
def audio_to_spectrogram(audio_path, save_path, img_size=(128, 128)):
    y, sr = librosa.load(audio_path, sr=None)  # Load the audio file
    # Generate mel spectrogram
    S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    S_dB = librosa.power_to_db(S, ref=np.max)  # Convert to dB scale
    
    # Create and save the plot (no axes for clean image)
    plt.figure(figsize=(img_size[0]/100, img_size[1]/100), dpi=100)
    plt.axis('off')
    librosa.display.specshow(S_dB, sr=sr, cmap='viridis')
    plt.tight_layout(pad=0)
    plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
    plt.close()

# Function to convert all audio files in a folder to spectrograms
def convert_folder_to_spectrograms(audio_folder, output_folder, image_extension='.png', img_size=(128, 128)):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    audio_files = [f for f in os.listdir(audio_folder) if f.lower().endswith('.wav')]
    if not audio_files:
        print("No WAV files found in the folder.")
        return
    
    for audio_file in audio_files:
        audio_path = os.path.join(audio_folder, audio_file)
        # Create output filename (e.g., 'audio.wav' -> 'audio.png')
        save_filename = os.path.splitext(audio_file)[0] + image_extension
        save_path = os.path.join(output_folder, save_filename)
        
        audio_to_spectrogram(audio_path, save_path, img_size)
        print(f"Converted {audio_file} to spectrogram: {save_filename}")

# Example usage - replace with your actual folders
# convert_folder_to_spectrograms('path/to/your/audio_folder', 'path/to/output_spectrograms_folder')


In [57]:

# Paths (adjust if needed)
csv_path = 'TRAIN.csv'  # Your training CSV with Filename and Class
images_folder = 'train_images'  # Folder with training spectrogram images
image_extension = '.png'  # Change to '.jpg' if necessary
img_size = (128, 128)  # Resize images to this size

# Step 1: Load CSV and prepare labels
df = pd.read_csv(csv_path)
labels = df['Class'].values
filenames = df['Filename'].values

# Map filenames to image paths (assuming images like '346.png' for '346.wav')
image_paths = [os.path.join(images_folder, f.replace('.wav', image_extension)) for f in filenames]

# Load images
images = []
valid_labels = []
for i, path in enumerate(image_paths):
    if os.path.exists(path):
        img = cv2.imread(path)
        img = cv2.resize(img, img_size)
        img = img / 255.0  # Normalize
        images.append(img)
        valid_labels.append(labels[i])
    else:
        print(f"Warning: Image not found: {path}")

images = np.array(images)
valid_labels = np.array(valid_labels)

# Encode labels (Positive=0, Negative=1, Neutral=2)
le = LabelEncoder()
encoded_labels = le.fit_transform(valid_labels)
encoded_labels = to_categorical(encoded_labels, num_classes=3)

# Use full data for training (no split, as per your request)
X_train = images
y_train = encoded_labels

# Step 2: Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(img_size[0], img_size[1], 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(3, activation='softmax')  # 3 classes
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Step 3: Train the model
history = model.fit(X_train, y_train, epochs=20, batch_size=32)

# Step 4: Save the model
model.save('sentiment_model.h5')
print("Model training complete and saved as 'sentiment_model.h5'")


Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 390ms/step - accuracy: 0.3624 - loss: 1.2207
Epoch 2/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 318ms/step - accuracy: 0.7170 - loss: 0.7907
Epoch 3/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 278ms/step - accuracy: 0.8069 - loss: 0.5116
Epoch 4/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 271ms/step - accuracy: 0.8345 - loss: 0.4642
Epoch 5/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 289ms/step - accuracy: 0.9248 - loss: 0.2627
Epoch 6/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 273ms/step - accuracy: 0.9417 - loss: 0.1599
Epoch 7/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 292ms/step - accuracy: 0.9790 - loss: 0.0868
Epoch 8/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 294ms/step - accuracy: 0.9905 - loss: 0.0487
Epoch 9/20
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2



Model training complete and saved as 'sentiment_model.h5'


In [None]:


# Distinct testing variables
test_images_folder = 'test_images'
test_image_extension = '.png'  # Change if needed
test_img_size = (128, 128)
test_model_path = 'sentiment_model.h5'
test_output_csv = 'test_predictions.csv'

# Load model
test_model = load_model(test_model_path)
print("Model loaded successfully.")

# Load test images directly from folder
test_filenames = [f for f in os.listdir(test_images_folder) if f.endswith(test_image_extension)]
test_image_paths = [os.path.join(test_images_folder, f) for f in test_filenames]

test_images = []
test_valid_filenames = []
for path in test_image_paths:
    img = cv2.imread(path)
    if img is not None:
        img = cv2.resize(img, test_img_size)
        img = img / 255.0
        test_images.append(img)
        test_valid_filenames.append(os.path.basename(path))
    else:
        print(f"Warning: Could not read image: {path}")

if not test_images:
    print("No valid images found. Check folder and extension.")
    exit()

test_images = np.array(test_images)

# Make predictions
test_predictions = test_model.predict(test_images)
test_predicted_classes = np.argmax(test_predictions, axis=1)

# Decode to string labels (match training classes)
test_le = LabelEncoder()
test_le.fit(['Positive', 'Negative', 'Neutral'])  # Assumes these classes from training
test_predicted_labels = test_le.inverse_transform(test_predicted_classes)

# Create DataFrame and save to CSV
test_results_df = pd.DataFrame({
    'Filename': test_valid_filenames,
    'PredictedClass': test_predicted_labels
})
test_results_df.to_csv(test_output_csv, index=False)
print(f"Predictions saved to '{test_output_csv}'")


In [None]:
# Paths (adjust if needed)
test_csv_path = '/Users/jathin/Desktop/sentiment analysis/archive/test_predictions.csv'  # Your test CSV with Filename and Class
test_images_folder = 'test_images'  # Folder with test spectrogram images
test_image_extension = '.png'  # Change to '.jpg' if necessary
test_img_size = (128, 128)  # Must match training size
test_model_path = 'sentiment_model.h5'  # Trained model
output_predictions_csv = 'test_predictions.csv'  # Optional output

# Step 1: Load the trained model
model = load_model(test_model_path)
print("Model loaded successfully.")

# Step 2: Load test CSV for actual labels
test_df = pd.read_csv(test_csv_path)
# Standardize labels (e.g., handle case/whitespace)
test_df['PredictedClass'] = test_df['PredictedClass'].str.strip().str.title()

# Step 3: Load and preprocess test images, aligning with CSV
test_filenames_csv = test_df['Filename'].values  # From CSV (e.g., '346.wav')
test_image_paths = [os.path.join(test_images_folder, f.replace('.wav', test_image_extension)) for f in test_filenames_csv]

test_images = []
test_valid_filenames = []
test_actual_labels = []
for i, path in enumerate(test_image_paths):
    if os.path.exists(path):
        img = cv2.imread(path)
        img = cv2.resize(img, test_img_size)
        img = img / 255.0  # Normalize
        test_images.append(img)
        test_valid_filenames.append(test_filenames_csv[i])
        test_actual_labels.append(test_df['PredictedClass'].iloc[i])
    else:
        print(f"Warning: Image not found: {path}")

if not test_images:
    print("No valid images found. Check paths and filenames.")
    exit()

test_images = np.array(test_images)
test_actual_labels = np.array(test_actual_labels)

# Encode actual labels (match training order)
le = LabelEncoder()
le.fit(['Positive', 'Negative', 'Neutral'])
encoded_actual_labels = le.transform(test_actual_labels)

# Step 4: Make predictions
predictions = model.predict(test_images)
predicted_classes = np.argmax(predictions, axis=1)
predicted_labels = le.inverse_transform(predicted_classes)

# Step 5: Compare and compute metrics
accuracy = accuracy_score(encoded_actual_labels, predicted_classes)
print(f"Test Accuracy: {accuracy * 100:.2f}%")

print("\nClassification Report:")
print(classification_report(encoded_actual_labels, predicted_classes, target_names=le.classes_))

print("\nConfusion Matrix:")
print(confusion_matrix(encoded_actual_labels, predicted_classes))

# Optional: Save predictions to CSV for reference
results_df = pd.DataFrame({
    'Filename': test_valid_filenames,
    'ActualClass': test_actual_labels,
    'PredictedClass': predicted_labels
})
results_df.to_csv(output_predictions_csv, index=False)
print(f"\nPredictions and actuals saved to '{output_predictions_csv}'")




Model loaded successfully.
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 50ms/step
Test Accuracy: 100.00%

Classification Report:
              precision    recall  f1-score   support

    Negative       1.00      1.00      1.00        36
     Neutral       1.00      1.00      1.00        39
    Positive       1.00      1.00      1.00        35

    accuracy                           1.00       110
   macro avg       1.00      1.00      1.00       110
weighted avg       1.00      1.00      1.00       110


Confusion Matrix:
[[36  0  0]
 [ 0 39  0]
 [ 0  0 35]]

Predictions and actuals saved to 'test_predictions.csv'
