# Projet E4

## Introduction

Modèle IA pour la reconnaissance d'anomalie chez le patient.
* Input : fichier .wav d'un cardiogramme
* Process : reconnaissance de l'anomalie
* Output : le type d'anomalie 

## Import

In [82]:
import wave
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn
from scipy import signal
from scipy.io import wavfile
import splitfolders
from keras import layers
from keras.layers import Activation, Dense, Dropout, Conv2D, Flatten, MaxPooling2D, GlobalMaxPooling2D, GlobalAveragePooling1D, AveragePooling2D, Input, add
from keras.models import Sequential
from tensorflow.keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
import pandas as pd

import cv2

In [24]:
path = "Data/MLDataset/Training/Normal/101_1305030823364_B.wav"
path_tokens = path.split("/")
print(path_tokens)

['Data', 'MLDataset', 'Training', 'Normal', '101_1305030823364_B.wav']


In [37]:
def wave_to_spect(path):
    path_tokens = path.split("/")
    sample_rate, samples = wavfile.read(path)
    os.makedirs("Data/Spectrograms/"+path_tokens[2]+"/"+path_tokens[3],exist_ok = True)
    plt.subplot(212)
    plt.specgram(samples, Fs=sample_rate,NFFT=2048,Fc = 0, sides='default',mode='default', scale='dB')
    plt.axis('off')
    plt.savefig("Data/Spectrograms/"+path_tokens[2]+"/"+path_tokens[3]+"/"+path_tokens[4].replace(".wav",".png"))
    plt.clf()




In [48]:

data_path = "Data/MLDataset"
for t in os.listdir(data_path):
    complete_data_path = os.path.join(data_path,t)
    for a in os.listdir(complete_data_path ):
        new_data_path = os.path.join(complete_data_path,a)
        for filename in os.listdir(new_data_path):
            wave_to_spect(os.path.join(new_data_path,filename).replace("\\","/"))

<Figure size 432x288 with 0 Axes>

In [53]:

splitfolders.ratio('./Data/Spectrograms/Known_datas',output="./Data/Spectrograms/Training",seed=1337,ratio=(.8,.2))

Copying files: 517 files [00:01, 375.56 files/s]


In [85]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

test_datagen = ImageDataGenerator(rescale=1./255)

training_set = train_datagen.flow_from_directory(
    './Data/Spectrograms/Training/train',
    target_size = (288, 432),
    batch_size= 19,
    class_mode = 'categorical',
    shuffle=False
)

test_set = test_datagen.flow_from_directory(
    './Data/Spectrograms/Training/val',
    target_size = (288, 432),
    batch_size= 19,
    class_mode = 'categorical',
    shuffle=False
)

Found 412 images belonging to 5 classes.
Found 105 images belonging to 5 classes.


In [71]:
model = Sequential()
input_shape = (288, 432, 3)#first hidden layer
model.add(Conv2D(32, (3,3), strides= (2,2), input_shape=input_shape))
model.add(AveragePooling2D((2,2),strides = (2,2)))
model.add(Activation('relu'))#2nd hidden layer
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(AveragePooling2D((2, 2), strides=(2,2)))
model.add(Activation('relu'))#3rd hidden layer
model.add(Conv2D(64, (3, 3), padding="same"))
model.add(AveragePooling2D((2, 2), strides=(2,2)))
model.add(Activation('relu'))#Flatten
model.add(Flatten())
model.add(Dropout(rate=0.5))#Add fully connected layer.
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(rate=0.5))#Output layer
model.add(Dense(5))
model.add(Activation('softmax'))
model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_3 (Conv2D)           (None, 143, 215, 32)      896       
                                                                 
 average_pooling2d_3 (Averag  (None, 71, 107, 32)      0         
 ePooling2D)                                                     
                                                                 
 activation_5 (Activation)   (None, 71, 107, 32)       0         
                                                                 
 conv2d_4 (Conv2D)           (None, 71, 107, 64)       18496     
                                                                 
 average_pooling2d_4 (Averag  (None, 35, 53, 64)       0         
 ePooling2D)                                                     
                                                                 
 activation_6 (Activation)   (None, 35, 53, 64)       

In [72]:
epochs = 200
batch_size = 8
learning_rate = 0.01
decay_rate = learning_rate / epochs
momentum = 0.9
sgd = SGD(learning_rate=learning_rate, momentum=momentum, decay=decay_rate, nesterov=False)
model.compile(optimizer="sgd", loss="categorical_crossentropy", metrics=['accuracy'])

In [74]:
model.fit_generator(
        training_set,
        steps_per_epoch=4,
        epochs=50,
        validation_data=test_set,
        validation_steps=200)

  model.fit_generator(


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x21ed399e850>

In [76]:
model.evaluate_generator(generator = test_set, steps=4)

  model.evaluate_generator(generator = test_set, steps=4)


[1.7070904970169067, 0.46052631735801697]

In [104]:
test_set.reset()
pred = model.predict_generator(test_set, steps=13, verbose=1)

  pred = model.predict_generator(test_set, steps=13, verbose=1)




In [105]:
predicted_class_indices = np.argmax(pred,axis=1)

labels = (training_set.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]
predictions = predictions[:200]
filenames = test_set.filenames
print(filenames)

['Artifact\\201106021541.png', 'Artifact\\201106101314.png', 'Artifact\\201106110909.png', 'Artifact\\201106111119.png', 'Artifact\\201106190520.png', 'Artifact\\201106211430.png', 'Artifact\\201106212112.png', 'Artifact\\201106221254.png', 'Extrahs\\201101161027.png', 'Extrahs\\201102241217.png', 'Extrahs\\201103150114.png', 'Extrahs\\201104270458.png', 'Extrasystole\\153_1306848820671_C.png', 'Extrasystole\\198_1308141739338_B1.png', 'Extrasystole\\202_1308145175747_C2.png', 'Extrasystole\\207_1308159792607_B1.png', 'Extrasystole\\209_1308162216750_D.png', 'Extrasystole\\235_1308749032454_B.png', 'Extrasystole\\237_1308750231222_C.png', 'Extrasystole\\249_1309202052376_C.png', 'Extrasystole\\261_1309353556003_C.png', 'Extrasystole\\286_1311170606028_D.png', 'Murmur\\135_1306428972976_A.png', 'Murmur\\156_1306936373241_B1.png', 'Murmur\\162_1307101835989_B.png', 'Murmur\\164_1307106095995_C1.png', 'Murmur\\193_1308078104592_B.png', 'Murmur\\195_1308140095331_A.png', 'Murmur\\196_13081

In [106]:
print(len(filenames),len(predictions))

105 105


In [107]:
results = pd.DataFrame({"Filename":filenames,
"Predictions":predictions})
results.to_csv('prediction_results.csv',index=False)