In [11]:
import os #for using operating system-dependent functionality
import numpy as np # for numerical computations
import pandas as pd # for data manipulation and analysis
import matplotlib.pyplot as plt #for creating visualizations 
import seaborn as sns #for high-level interfaces, built on-top plt
from PIL import Image #downsample the images
import warnings
warnings.filterwarnings(action="ignore")

from tensorflow.keras.preprocessing.image import ImageDataGenerator, load_img, img_to_array #for processing images + not overwhelm memory

import librosa #for audio analysis
import librosa.display #for displaying audio data

from sklearn.preprocessing import MinMaxScaler #for feature scaling
from tqdm import tqdm, notebook,trange #functions/classes for displaying progress bars during iterations
from sklearn.linear_model import LogisticRegression # for logistic regression modeling

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, confusion_matrix

from keras.preprocessing import image
from keras import backend as K #pre-trained deep learning models for computer vision tasks
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential #to build sequential neural network models
from keras.layers import Flatten,BatchNormalization #DL layers
from keras.layers import Dense,Dropout
from keras.optimizers import Adam # optimizer used in training
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

import plotly as px #for creating interactive visualizations
import plotly.graph_objects as go #for creating graph objects

from pylab import rcParams #to customize graph figure
rcParams['figure.figsize'] = 15,6
plt.style.use('fivethirtyeight')

In [16]:
df = pd.read_csv("D:/AMINA/PFE24/application/data.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 126100 entries, 0 to 126099
Data columns (total 4 columns):
 #   Column    Non-Null Count   Dtype 
---  ------    --------------   ----- 
 0   filename  126100 non-null  object
 1   age       126100 non-null  int64 
 2   gender    126100 non-null  object
 3   accent    126100 non-null  object
dtypes: int64(1), object(3)
memory usage: 3.8+ MB


In [14]:
import re

# Image preprocessing
output_folder = "D:/AMINA/PFE24/application/audio_imgs"
#sorting images by their number
def numerical_sort(value):
    # Extract the numerical part of the file name
    numbers = re.findall(r'^(\d+)', value)
    return int(numbers[0]) if numbers else -1

image_paths = sorted([os.path.join(output_folder, filename) for filename in os.listdir(output_folder) if filename.endswith('_mfcc.png')], key=numerical_sort)
# Initialize ImageDataGenerator for preprocessing
datagen = ImageDataGenerator(rescale=1./255)  # rescale pixel values to [0,1]

# Parameters for loading images
batch_size = 10
target_size = (128, 128)  # target size for resizing images

# Create generator from image paths
def image_generator(image_paths, batch_size, target_size):
    num_samples = len(image_paths)
    while True:
        for i in range(0, num_samples, batch_size):
            batch_paths = image_paths[i:i+batch_size]
            batch_images = []
            for path in batch_paths:
                img = load_img(path, target_size=target_size)
                img = img_to_array(img) / 255.0  # normalize pixel values
                batch_images.append(img)
            yield np.array(batch_images)

# Create generator
generator = image_generator(image_paths, batch_size, target_size)

# Calculate number of steps per epoch
steps_per_epoch = len(image_paths) // batch_size

# Loop over batches to extract images
images = []
for i in range(steps_per_epoch):
    batch = next(generator)
    print(f"Processing batch {i+1}/{steps_per_epoch}")
    images.extend(batch)
    for j in range(len(batch)):
        print(f"Processing image {i*batch_size + j + 1}/{len(image_paths)}", end="\r")

images = np.array(images)

Processing batch 1/12610
Processing batch 2/1261008
Processing batch 3/1261008
Processing batch 4/1261008
Processing batch 5/1261008
Processing batch 6/1261008
Processing batch 7/1261008
Processing batch 8/1261008
Processing batch 9/1261008
Processing batch 10/126108
Processing batch 11/1261008
Processing batch 12/1261008
Processing batch 13/1261008
Processing batch 14/1261008
Processing batch 15/1261008
Processing batch 16/1261008
Processing batch 17/1261008
Processing batch 18/1261008
Processing batch 19/1261008
Processing batch 20/1261008
Processing batch 21/1261008
Processing batch 22/1261008
Processing batch 23/1261008
Processing batch 24/1261008
Processing batch 25/1261008
Processing batch 26/1261008
Processing batch 27/1261008
Processing batch 28/1261008
Processing batch 29/1261008
Processing batch 30/1261008
Processing batch 31/1261008
Processing batch 32/1261008
Processing batch 33/1261008
Processing batch 34/1261008
Processing batch 35/1261008
Processing batch 36/1261008
Proc

In [17]:
len(images)

126100

In [18]:
# Combine image and audio data
X = images
# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(df['age'])

#bc StratifiedKFold doesn't split into 3 sets: 
#split into temp train set (split into train 60% + val 20% sets) + test set 20%
X_train, X_temp, y_train, y_temp = train_test_split(X, y_encoded, test_size=0.2, stratify=y_encoded, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.25, stratify=y_temp, random_state=42)

In [22]:

model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))  # All imgs are 200x200 pixels
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dense(8, activation='softmax')) #8 = number of age classes

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Set the desired number of training samples
num_train_samples = 1000  # Adjust this based on your memory constraints

# Randomly sample the training data
indices = np.random.choice(len(X_train), num_train_samples, replace=False)
X_train_subset = X_train[indices]
y_train_subset = y_train[indices]

# Train the model with the subset of data
model.fit(X_train_subset, y_train_subset, epochs=10, batch_size=16)
# Train the model
#model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the test data
test_loss, test_accuracy = model.evaluate(X_test, y_test)

# Predict classes for the test data
y_pred = model.predict_classes(X_test)

# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
recall = recall_score(y_test, y_pred, average='weighted')
precision = precision_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')
conf_matrix = confusion_matrix(y_test, y_pred)

# Print out the evaluation metrics results
print("Test Loss:", test_loss)
print("Test Accuracy:", test_accuracy)
print("Accuracy:", accuracy)
print("Recall:", recall)
print("Precision:", precision)
print("F1 Score:", f1)

Epoch 1/10


In [None]:
print("Confusion Matrix:")
print(conf_matrix)