In [None]:

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
#Read the voice dataset
mydata = pd.read_csv("voice/voice.csv")
#Preview voice dataset
mydata.head()
print(mydata.shape)
#Plot the histograms
male = mydata.loc[mydata['label']=='male']
female = mydata.loc[mydata['label']=='female']
fig, axes = plt.subplots(10, 2, figsize=(10,20))
ax = axes.ravel()
for i in range(20):
    ax[i].hist(male.ix[:,i], bins=20, color=mglearn.cm3(0), alpha=.5)
    ax[i].hist(female.ix[:, i], bins=20, color=mglearn.cm3(2), alpha=.5)
    ax[i].set_title(list(male)[i])
    ax[i].set_yticks(())
    
ax[0].set_xlabel("Feature magnitude")
ax[0].set_ylabel("Frequency")
ax[0].legend(["male", "female"], loc="best")
fig.tight_layout()
#Prepare data for modeling
mydata.loc[:,'label'][mydata['label']=="male"] = 0
mydata.loc[:,'label'][mydata['label']=="female"] = 1
mydata_train, mydata_test = train_test_split(mydata, random_state=0, test_size=.2)
scaler = StandardScaler()
scaler.fit(mydata_train.ix[:,0:20])
X_train = scaler.transform(mydata_train.ix[:,0:20])
X_test = scaler.transform(mydata_test.ix[:,0:20])
y_train = list(mydata_train['label'].values)
y_test = list(mydata_test['label'].values)
#Train decision tree model
tree = DecisionTreeClassifier(random_state=0).fit(X_train, y_train)
print("Decision Tree")
print("Accuracy on training set: {:.3f}".format(tree.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(tree.score(X_test, y_test)))
#Train random forest model
forest = RandomForestClassifier(n_estimators=5, random_state=0).fit(X_train, y_train)
print("Random Forests")
print("Accuracy on training set: {:.3f}".format(forest.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(forest.score(X_test, y_test)))
#Train gradient boosting model
gbrt = GradientBoostingClassifier(random_state=0).fit(X_train, y_train)
print("Gradient Boosting")
print("Accuracy on training set: {:.3f}".format(gbrt.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(gbrt.score(X_test, y_test)))
#Train support vector machine model
svm = SVC().fit(X_train, y_train)
print("Support Vector Machine")
print("Accuracy on training set: {:.3f}".format(svm.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(svm.score(X_test, y_test)))
#Train neural network model
mlp = MLPClassifier(random_state=0).fit(X_train, y_train)
print("Multilayer Perceptron")
print("Accuracy on training set: {:.3f}".format(mlp.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test, y_test)))
#Plot the variable importance
def plot_feature_importances_mydata(model):
    n_features = X_train.shape[1]
    plt.barh(range(n_features), model.feature_importances_, align='center')
    plt.yticks(np.arange(n_features), list(mydata))
    plt.xlabel("Variable importance")
    plt.ylabel("Independent Variable")
plot_feature_importances_mydata(tree)
plot_feature_importances_mydata(forest)
plot_feature_importances_mydata(gbrt)
#Plot the heatmap on first layer weights for neural network
plt.figure(figsize=(100, 20))
plt.imshow(mlp.coefs_[0], interpolation='none', cmap='viridis')
plt.yticks(range(20), list(mydata),fontsize = 50)
plt.xlabel("Columns in weight matrix", fontsize = 50)
plt.ylabel("Input feature", fontsize = 50)
plt.colorbar().set_label('Importance',size=50)
    
plt.show()

In [1]:
#подключаем библиотеки
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
!pip install mglearn
!pip install sklearn
!pip install -U scikit-learn
import mglearn
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier

Collecting mglearn
  Downloading mglearn-0.1.9.tar.gz (540 kB)
[?25l[K     |▋                               | 10 kB 26.8 MB/s eta 0:00:01[K     |█▏                              | 20 kB 32.0 MB/s eta 0:00:01[K     |█▉                              | 30 kB 37.6 MB/s eta 0:00:01[K     |██▍                             | 40 kB 41.4 MB/s eta 0:00:01[K     |███                             | 51 kB 31.0 MB/s eta 0:00:01[K     |███▋                            | 61 kB 28.3 MB/s eta 0:00:01[K     |████▎                           | 71 kB 25.0 MB/s eta 0:00:01[K     |████▉                           | 81 kB 26.7 MB/s eta 0:00:01[K     |█████▌                          | 92 kB 28.2 MB/s eta 0:00:01[K     |██████                          | 102 kB 29.0 MB/s eta 0:00:01[K     |██████▊                         | 112 kB 29.0 MB/s eta 0:00:01[K     |███████▎                        | 122 kB 29.0 MB/s eta 0:00:01[K     |███████▉                        | 133 kB 29.0 MB/s eta 0:00:01[K

In [16]:
#функция превращает аудиозаписи в картинки
import numpy as np
import librosa.display
import matplotlib.pyplot as plt
import librosa
import librosa.display
import pylab
def generate_spectogram(voice):
    print(voice)
    #voice = f'female (1).flac'

    y, sr = librosa.load(voice, mono=True, duration=5)

    fig, ax = plt.subplots()

    # trim silent edges

    whale_song, _ = librosa.effects.trim(y)

    librosa.display.waveplot(whale_song, sr=sr);

    n_fft = 2048

    D = np.abs(librosa.stft(whale_song[:n_fft], n_fft=n_fft, hop_length=n_fft + 1))

    hop_length = 256

    D = np.abs(librosa.stft(whale_song, n_fft=n_fft, hop_length=hop_length))

    librosa.display.specshow(D, sr=sr, y_axis='linear');

    DB = librosa.amplitude_to_db(D, ref=np.max)

    librosa.display.specshow(DB, sr=sr, hop_length=hop_length, y_axis='log');

    pylab.axis('off')

    plt.gca().set_axis_off()

    plt.subplots_adjust(top=1, bottom=0, right=1, left=0, hspace=0, wspace=0)

    plt.margins(0, 0)

    plt.gca().xaxis.set_major_locator(plt.NullLocator())

    plt.gca().yaxis.set_major_locator(plt.NullLocator())

    voice = voice.split('/')[-1]
  
    fig.savefig(f'/content/drive/MyDrive/ПРоект АнДан/Спектограммы/val/male/{voice}.png') 

    plt.clf()
 

In [None]:
generate_spectogram(file)

NameError: ignored

In [None]:
#превращение 37 женских аудиозаписей в картинку
for i in range(393,466,1):
  generate_spectogram('/content/drive/MyDrive/ПРоект АнДан/Аудиозаписи/Female voice/female (' + str(i) + ').flac') #это voice

In [None]:
import re
x = '/content/drive/MyDrive/ПРоект АнДан/Аудиозаписи/Female voice/female (1).flac'
y = 'Bye, World!'
xx = x.split('/')
xx[-1]

'female (1).flac'

In [None]:
#превращение 37 мужских аудиозаписей в картинку
for i in range(465,466,1):
  generate_spectogram('/content/drive/MyDrive/ПРоект АнДан/Аудиозаписи/Male voice/male (' + str(i)+ ').flac') #/content/drive/MyDrive/ПРоект АнДан/Аудиозаписи/Male voice/male (142).flac

/content/drive/MyDrive/ПРоект АнДан/Аудиозаписи/Male voice/male (465).flac


<Figure size 432x288 with 0 Axes>

In [17]:
#подключаем библиотеки для обработки изображений
from tensorflow.python.keras.preprocessing.image import ImageDataGenerator
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.layers import Conv2D, MaxPooling2D
from tensorflow.python.keras.layers import Activation, Dropout, Flatten, Dense

In [18]:
# Каталог с данными для обучения
train_dir = '/content/drive/MyDrive/ПРоект АнДан/Спектограммы/train'
# Каталог с данными для проверки
val_dir = '/content/drive/MyDrive/ПРоект АнДан/Спектограммы/val'
# Каталог с данными для тестирования
test_dir = '/content/drive/MyDrive/ПРоект АнДан/Спектограммы/test'
# Размеры изображения
img_width, img_height = 432, 288
# Размерность тензора на основе изображения для входных данных в нейронную сеть
# backend Tensorflow, channels_last
input_shape = (img_width, img_height, 3)
# Количество эпох
epochs = 30
# Размер мини-выборки
batch_size = 16
# Количество изображений для обучения
nb_train_samples = 574
# Количество изображений для проверки
nb_validation_samples = 174
# Количество изображений для тестирования
nb_test_samples = 182

In [19]:
#создаем нейронную сеть для картинок
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=input_shape))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))

model.add(Flatten())
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

#компилируем нейронную сеть
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [20]:
datagen = ImageDataGenerator(rescale=1. / 255) #Генератор делит значения всех пикселов изображения на 255.

#Генератор данных для обучения на основе изображений из каталога
#для train данных
train_generator = datagen.flow_from_directory(
    train_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 573 images belonging to 2 classes.


In [21]:
#для val данных
val_generator = datagen.flow_from_directory(
    val_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 175 images belonging to 2 classes.


In [22]:
#для test данных
test_generator = datagen.flow_from_directory(
    test_dir,
    target_size=(img_width, img_height),
    batch_size=batch_size,
    class_mode='binary')

Found 182 images belonging to 2 classes.


In [None]:
%%time
#обучение модели с помощью генератора
model.fit_generator(
    train_generator, #train_generator - генератор данных для обучения
    steps_per_epoch=nb_train_samples // batch_size,
    epochs=epochs,
    validation_data=val_generator, #validation_data - генератор данных для проверки
    validation_steps=nb_validation_samples // batch_size)




Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30

In [12]:
k = '/content/drive/MyDrive/ПРоект АнДан/Аудиозаписи/АнДанТнст.m4a'
new = generate_spectogram(k)
model.predict(new)

/content/drive/MyDrive/ПРоект АнДан/Аудиозаписи/АнДанТнст.m4a




ValueError: ignored

<Figure size 432x288 with 0 Axes>

In [13]:
#оценка аккуратности данных
scores = model.evaluate_generator(test_generator, nb_test_samples // batch_size)
print("Аккуратность на тестовых данных: %.2f%%" % (scores[1]*100))

NameError: ignored

In [None]:
pip install pyTelegramBotAPI
import telebot
from telebot import types
#1840603958:AAHYjyuGKnfIFlEmclpx-9agQywzI3Cpj-I
name =''
surname = ''
age = 0
bot = telebot.TeleBot("1840603958:AAHYjyuGKnfIFlEmclpx-9agQywzI3Cpj-I", parse_mode=None)


SyntaxError: ignored

In [11]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
'/content/drive/MyDrive/ПРоект АнДан/Спектограммы/Female png' = generate_spectogram('/content/drive/MyDrive/ПРоект АнДан/Аудиозаписи/Female voice/female (37).flac')


SyntaxError: ignored

In [None]:
plt = 'male (8).flac.png'

plt.imshow(train_images[0])
plt.colorbar()
plt.grid(False)

In [None]:
from google.colab import files
files.download('female (1).flac.png')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
#Read the voice dataset
mydata = pd.read_csv("voice.csv")

#Preview voice dataset
mydata.head()

In [None]:
X = mydata.drop(columns=['label']) #dataset without gender identifier
y = mydata['label'] #gender identifier column
X.head() #Preview "x" dataset

In [None]:
from sklearn.preprocessing import LabelEncoder #importing LabelEncoder 
labelencoder = LabelEncoder()
Y = labelencoder.fit_transform(y) #transforming string data into number set
print(Y)

[1 1 1 ... 0 0 0]


In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y , test_size=0.33, random_state = 42)
print(X.shape)
print(X_train.shape)
print(X_test.shape)


(3168, 20)
(2122, 20)
(1046, 20)


In [None]:
#Train decision tree model
clf = DecisionTreeClassifier()
clf.fit(X_train, y_train) #learning
y_prediction = clf.predict(X_test) #model is trying to predict by itself

from sklearn.metrics import accuracy_score, confusion_matrix
accuracy_score(y_test, y_prediction)  #comparing prediction and the giving answers
confusion_matrix(y_test, y_prediction) #right prediction for each group

array([[479,  20],
       [ 21, 526]])

In [None]:
y_test.value_counts()

male      547
female    499
Name: label, dtype: int64

In [None]:
    #Train random forest model
forest = RandomForestClassifier(n_estimators=5, random_state=0).fit(X_train, y_train)
print("Random Forests")
print("Accuracy on training set: {:.3f}".format(forest.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(forest.score(X_test, y_test)))

In [None]:
#Train gradient boosting model
gbrt = GradientBoostingClassifier(random_state=0).fit(X_train, y_train)
print("Gradient Boosting")
print("Accuracy on training set: {:.3f}".format(gbrt.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(gbrt.score(X_test, y_test)))

In [None]:
#Train support vector machine model
svm = SVC().fit(X_train, y_train)
print("Support Vector Machine")
print("Accuracy on training set: {:.3f}".format(svm.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(svm.score(X_test, y_test)))

In [None]:
#Train neural network model
mlp = MLPClassifier(random_state=0).fit(X_train, y_train)
print("Multilayer Perceptron")
print("Accuracy on training set: {:.3f}".format(mlp.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(mlp.score(X_test, y_test)))

In [None]:
#Plot the variable importance
def plot_feature_importances_mydata(model):
    n_features = X_train.shape[1]
    plt.barh(range(n_features), model.feature_importances_, align='center')
    plt.yticks(np.arange(n_features), list(mydata))
    plt.xlabel("Variable importance")
    plt.ylabel("Independent Variable")
plot_feature_importances_mydata(tree)
plot_feature_importances_mydata(forest)
plot_feature_importances_mydata(gbrt)

In [None]:
#Plot the heatmap on first layer weights for neural network
plt.figure(figsize=(100, 20))
plt.imshow(mlp.coefs_[0], interpolation='none', cmap='viridis')
plt.yticks(range(20), list(mydata),fontsize = 50)
plt.xlabel("Columns in weight matrix", fontsize = 50)
plt.ylabel("Input feature", fontsize = 50)
plt.colorbar().set_label('Importance',size=50)
    
plt.show()

# Новый раздел