In [1]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm

import os
os.chdir('../Python')
import TrainingsDataInterface
import TrainingsInterface
import Train
import DatasetAugmentation
import Constants

print('installed version of Tensorflow: ', tf.__version__)

installed version of Tensorflow:  2.14.0


In [2]:
fashion_mnist = tf.keras.datasets.fashion_mnist

(train_images, train_labels), (test_images, test_labels) = fashion_mnist.load_data()

print(train_images.shape)
print(train_labels.shape)
print(train_labels[0])

(60000, 28, 28)
(60000,)
9


In [4]:
AudioDataLengthInMilliseconds = Constants.theConstants.getWordLengthInMilliseconds()
NumberOfTestSamples = 20
NumberOfValidationSamples = 20

ATrainingsDataInterface = TrainingsDataInterface.CTrainingsDataInterface()
def GetNumberOfTrainingsData():
    res = 0
    for CommandIndex in range(ATrainingsDataInterface.GetNumberOfCommands()):
        command = ATrainingsDataInterface.GetCommandString(CommandIndex)
        if command in Train.VOCABULARY:
            NewSamples = ATrainingsDataInterface.GetNumberOfCommandInstances(CommandIndex)
            NewSamples -= NumberOfTestSamples
            NewSamples -= NumberOfValidationSamples
            assert NewSamples > 0, str('not enough training samples for command ' + command)
            res += NewSamples
    return res

def GetAudioWithConstantLength(x, Fs):
    LengthInSamples = int(AudioDataLengthInMilliseconds * Fs / 1000)
    if x.shape[0] < LengthInSamples:
        y = np.concatenate((x, np.zeros((LengthInSamples - x.shape[0]))), axis = 0)
    else:
        E_cumsum = np.cumsum(x**2)
        tmp = E_cumsum[LengthInSamples:]
        tmp -= E_cumsum[:tmp.shape[0]]
        MaxIndex = np.argmax(tmp)
        y = x[MaxIndex:MaxIndex + LengthInSamples]
    assert np.abs(y.shape[0] - LengthInSamples) < 1e-1, 'wrong output length'
    return y

def IsTraining(InstanceIndex):
    return not (IsTest(InstanceIndex) or IsValidation(InstanceIndex))

def IsValidation(InstanceIndex):
    return (not IsTest(InstanceIndex)) and (InstanceIndex < (NumberOfTestSamples + NumberOfValidationSamples))

def IsTest(InstanceIndex):
    return InstanceIndex < NumberOfTestSamples

def EvaluateAllData():
    Constants.theConstants.SetUseVAD(False)
    train_images = None
    train_counter = 0
    test_counter = 0
    validation_counter = 0
    for CommandIndex in tqdm(range(ATrainingsDataInterface.GetNumberOfCommands())):
        command = ATrainingsDataInterface.GetCommandString(CommandIndex)
        if command in Train.VOCABULARY:
            for n in range(len(Train.VOCABULARY)):
                if Train.VOCABULARY[n] == command:
                    commandlabel = n
            for InstanceIndex in range(ATrainingsDataInterface.GetNumberOfCommandInstances(CommandIndex)):
                x, Fs, bits = ATrainingsDataInterface.GetWaveOfCommandInstance(CommandIndex, InstanceIndex)
                assert np.abs(Constants.theConstants.getSamplingFrequencyMicrofone() - Fs) < 1e-3, 'wrong sampling rate'
                ADatasetAugmentation = DatasetAugmentation.CAudioDatasetAugmentation(x, Fs)
                NumberOfDistortions = 1#ADatasetAugmentation.GetNumberOfResults()
                if IsTraining(InstanceIndex):
                    MaxDistortionIndex = NumberOfDistortions
                else:
                    MaxDistortionIndex = 1
                for DistortionIndex in range(MaxDistortionIndex):
                    y, Timestretchfactor = ADatasetAugmentation.GenerateSingleDistortion(DistortionIndex)
                    z = GetAudioWithConstantLength(y, Fs)
                    Feature = TrainingsInterface.SamplesToFeature(z, Fs)
                    if train_images is None:
                        train_images = np.zeros((GetNumberOfTrainingsData()*NumberOfDistortions, Feature.shape[0], Feature.shape[1]))
                        test_images = np.zeros((NumberOfTestSamples*len(Train.VOCABULARY), Feature.shape[0], Feature.shape[1]))
                        validation_images = np.zeros((NumberOfValidationSamples*len(Train.VOCABULARY), Feature.shape[0], Feature.shape[1]))
                        train_labels = np.zeros((train_images.shape[0]))
                        test_labels = np.zeros((test_images.shape[0]))
                        validation_labels = np.zeros((validation_images.shape[0]))
                    if IsTraining(InstanceIndex):
                        train_images[train_counter, :, :] = Feature
                        train_labels[train_counter] = commandlabel
                        train_counter += 1   
                    elif IsTest(InstanceIndex):
                        test_images[test_counter, :, :] = Feature
                        test_labels[test_counter] = commandlabel
                        test_counter += 1   
                    else:
                        validation_images[validation_counter, :, :] = Feature
                        validation_labels[validation_counter] = commandlabel
                        validation_counter += 1                         
    return train_images, train_labels, test_images, test_labels, validation_images, validation_labels

def GetAllData():
    Filename = 'tmp.npz'
    try:
        x = 1/0
        data = np.load(Filename)
        train_images = data['x0']
        train_labels = data['x1']
        test_images = data['x2']
        test_labels = data['x3']
        validation_images = data['x4']
        validation_labels = data['x5']
    except:
        train_images, train_labels, test_images, test_labels, validation_images, validation_labels = EvaluateAllData()
        np.savez(Filename, x0 = train_images, x1 = train_labels, x2 = test_images, x3 = test_labels, x4 = validation_images, x5 = validation_labels)
    return train_images, train_labels, test_images, test_labels, validation_images, validation_labels

train_images, train_labels, test_images, test_labels, validation_images, validation_labels = GetAllData()
print(train_images.shape)
print(train_labels.shape)

100%|██████████████████████████████████████████████████████████████████████████████████| 47/47 [01:21<00:00,  1.74s/it]


(1565, 38, 69)
(1565,)


In [11]:
model = tf.keras.Sequential([
    tf.keras.layers.Flatten(input_shape=(train_images.shape[1], train_images.shape[2])),
    tf.keras.layers.Dense(128, activation='relu'),
    tf.keras.layers.Dense(len(Train.VOCABULARY))
])

For classification tasks, the cross entropy is typically chosen as loss function for training. The cross entropy is defined by:

$L=-\sum_j o_j\cdot\log z_j$

with $o_j$ corresponding to the correct output and $z_j$ corresponding to the current output of the neural network during training.

In [17]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy', patience=5)
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

history = model.fit(train_images, train_labels, epochs=1000, validation_data=(validation_images, validation_labels), callbacks=[callback], verbose = 0)
print('training finished after ', len(history.history['loss']), ' epochs')

test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

print('\nTest accuracy:', test_acc)

training finished after  8  epochs
6/6 - 0s - loss: 0.9335 - accuracy: 0.6556 - 40ms/epoch - 7ms/step

Test accuracy: 0.6555555462837219


Trainingsbeispiele für Tensorflow:
1 notebook: Klassifikation der Wörter mit Datasetaugmentation, regularisierung, ...
1 notebook: funktion transformiert Spektrogramm in MFCC, DMFCC und DDMFCC, kann Tensorflow diese Funktion invertieren?
1 notebook: Tiefpass ohne Latenz mit Tensorflow basteln?
1 notebook: Nachbildung der webrtcvad?

Exam Preparation:
Was war die Problemstellung?
Warum benötigt man ein nicht-lineares System?
Was für Probleme traten generell auf?
War es schwierig die Trainingsdaten zu erzeugen?
