In [2]:
# feature extractoring and preprocessing data
# 음원 데이터를 분석
import librosa

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# notebook을 실행한 브라우저에서 바로 그림을 볼 수 있게 해주는 것
%matplotlib inline

# 운영체제와의 상호작용을 돕는 다양한 기능을 제공
# 1. 현재 디렉토리 확인하기
# 2. 디렉토리 변경
# 3. 현재 디렉토리의 파일 목록 확인하기
# 4. csv 파일 호출
import os

# 파이썬에서의 이미지 처리
from PIL import Image

import pathlib
import csv

# Preprocessing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error

#Keras
import keras

# 경고 메시지를 무시하고 숨기거나  -> warnings.filterwarnings(action='ignore')
# 일치하는 경고를 인쇄하지 않습니다 = ('ignore')
import warnings
warnings.filterwarnings('ignore')

In [3]:
# 원하는 종류의 색깔만 넘겨주는 것
cmap = plt.get_cmap('inferno')

plt.figure(figsize=(10,10))
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    pathlib.Path(f'img_data/{g}').mkdir(parents=True, exist_ok=True)     
    for filename in os.listdir(f'./MIR/genres/{g}'):
        songname = f'./MIR/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=5)
        plt.specgram(y, NFFT=2048, Fs=2, Fc=0, noverlap=128, cmap=cmap, sides='default', mode='default', scale='dB');
        plt.axis('off');
        plt.savefig(f'img_data/{g}/{filename[:-3].replace(".", "")}.png')
        plt.clf()

<Figure size 720x720 with 0 Axes>

In [4]:
header = 'filename chroma_stft rmse spectral_centroid spectral_bandwidth rolloff zero_crossing_rate'
for i in range(1, 21):
    header += f' mfcc{i}'
header += ' label'
header = header.split()

In [5]:
file = open('data.csv', 'w', newline='')
with file:
    writer = csv.writer(file)
    writer.writerow(header)
genres = 'blues classical country disco hiphop jazz metal pop reggae rock'.split()
for g in genres:
    for filename in os.listdir(f'./MIR/genres/{g}'):
        songname = f'./MIR/genres/{g}/{filename}'
        y, sr = librosa.load(songname, mono=True, duration=30)
        chroma_stft = librosa.feature.chroma_stft(y=y, sr=sr)
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
        zcr = librosa.feature.zero_crossing_rate(y)
        mfcc = librosa.feature.mfcc(y=y, sr=sr)
        #rmse = mean_squared_error(y, y_pred=sr)**0.5
        rmse = librosa.feature.rms(y=y)
        to_append = f'{filename} {np.mean(chroma_stft)} {np.mean(rmse)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'    
        for e in mfcc:
            to_append += f' {np.mean(e)}'
        to_append += f' {g}'
        file = open('data.csv', 'a', newline='')
        with file:
            writer = csv.writer(file)
            writer.writerow(to_append.split())

In [6]:
# mfcc = 오디오 신호에서 추출할 수 있는 feature로, 소리의 고유한 특징을 나타내는 수치
#      = 등록된 음성과 현재 입력된 음성의 유사도를 판별하는 근거의 일부로 쓰입니다.
#      = MFCC(Mel-Frequency Cepstral Coefficient)는
#        Mel Spectrum(멜 스펙트럼)에서 Cepstral(켑스트럴) 분석을 통해 추출된 값
#      
# 이해하기 위해 먼저 
# -  Spectrum(스펙트럼)
# -  Cepstrum(켑스트럼)
# -  Mel Spectrum(멜 스펙트럼)  들을 알아야 한다.

In [7]:
data = pd.read_csv('data.csv')
data.head()

# chroma_stft = 채도_? , 크로마 표준
# spectral_centroid = 스펙트럼 중심
# spectral_bandwidth = 스펙트럼 대역폭
# rolloff = 롤 오프
# zero_crossing_rate = 제로 크로싱 비율
#        
# mfcc[n] = 

Unnamed: 0,filename,chroma_stft,rmse,spectral_centroid,spectral_bandwidth,rolloff,zero_crossing_rate,mfcc1,mfcc2,mfcc3,...,mfcc12,mfcc13,mfcc14,mfcc15,mfcc16,mfcc17,mfcc18,mfcc19,mfcc20,label
0,blues.00093.wav,0.37769,0.065906,569.930721,995.407125,927.427725,0.021701,-350.436188,169.545746,31.82037,...,1.82169,-5.970891,-5.259567,-0.229211,-1.77685,-3.713751,0.181591,2.07239,-2.896225,blues
1,blues.00087.wav,0.336773,0.158098,1442.190271,1870.534155,3083.414688,0.050889,-155.504929,125.638863,1.596553,...,-0.792893,-7.748057,0.413548,-7.030263,3.997679,-6.256611,0.958227,2.019821,-5.742188,blues
2,blues.00050.wav,0.40086,0.18238,1945.848425,2082.246626,4175.874749,0.085806,-82.979019,107.052124,-25.320452,...,12.539581,-9.762303,2.562253,-6.300853,2.996785,-8.718455,-0.326581,-2.980347,0.7126,blues
3,blues.00044.wav,0.390212,0.136276,2279.124558,2375.10212,5198.360233,0.09257,-109.509285,86.922409,-8.607986,...,11.087481,-5.085794,3.97636,-12.859742,12.343859,0.026216,-0.741568,-5.12662,3.303442,blues
4,blues.00078.wav,0.414188,0.258052,2333.685108,2227.425609,4942.811778,0.123863,-2.524338,101.252716,-33.924385,...,12.506608,-13.368822,6.112817,-9.06589,5.033774,-11.330277,3.166534,-4.567591,-4.033623,blues


In [8]:
data.shape

(1000, 28)

In [9]:
# Dropping unneccesary columns
data = data.drop(['filename'],axis=1)

In [10]:
genre_list = data.iloc[:, -1]
encoder = LabelEncoder()
y = encoder.fit_transform(genre_list)

In [11]:
scaler = StandardScaler()
X = scaler.fit_transform(np.array(data.iloc[:, :-1], dtype = float))

In [12]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [13]:
len(y_train)

800

In [14]:
len(y_test)

200

In [15]:
X_train[10]

array([ 1.20479352,  1.42609825,  0.96082301,  0.18085232,  0.58058547,
        1.74517674,  1.53839082, -0.74177591, -1.03017517,  1.23524868,
       -1.60298213,  1.06466264, -1.15759201,  0.88320942, -0.8348293 ,
        1.50334749, -1.47161995,  1.93763887, -1.32057984,  1.98280542,
       -1.49915185,  0.94407189, -1.0132643 ,  0.92919344, -0.55400392,
        2.58643577])

In [16]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(256, activation='relu', input_shape=(X_train.shape[1],)))

model.add(layers.Dense(128, activation='relu'))

model.add(layers.Dense(64, activation='relu'))

model.add(layers.Dense(10, activation='softmax'))

In [17]:
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [18]:
history = model.fit(X_train,
                    y_train,
                    epochs=20,
                    batch_size=128)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [19]:
test_loss, test_acc = model.evaluate(X_test,y_test)



In [20]:
print('test_acc: ',test_acc)

test_acc:  0.7099999785423279


In [21]:
x_val = X_train[:200]
partial_x_train = X_train[200:]

y_val = y_train[:200]
partial_y_train = y_train[200:]

In [22]:

model = models.Sequential()
model.add(layers.Dense(512, activation='relu', input_shape=(X_train.shape[1],)))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(128, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(10, activation='softmax'))

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

model.fit(partial_x_train,
          partial_y_train,
          epochs=30,
          batch_size=512,
          validation_data=(x_val, y_val))
results = model.evaluate(X_test, y_test)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [23]:
results

[1.1047197580337524, 0.6449999809265137]

In [24]:
predictions = model.predict(X_test)

In [25]:
predictions[0].shape

(10,)

In [26]:
np.sum(predictions[0])

1.0

In [27]:
np.argmax(predictions[0])

4

In [28]:
from flask import send_file
from flask import Flask, request
from werkzeug.utils import secure_filename

import os
import socket

#import requests
#
#url = "http://localhost:5000/save"
#
#headers = {
#    "Content-Type": "audio/wav",
#}
#params = {"uploadType": "media", "name": "test.wav"}
#with open('test.wav', 'rb') as file:
#  r = requests.post(url, params=params, headers=headers, data=file)
#print(r.text)

app = Flask(__name__)

# RFC - https://tools.ietf.org/html/rfc3003
@app.route('/mp3_download')
def mp3_download():
    file_name = "./TwoStepDR_320kbps.mp3"
    return send_file(file_name,
                     mimetype='audio/mpeg',
                     attachment_filename='mp3_test.mp3',# 다운받아지는 파일 이름. 
                     as_attachment=True)

@app.route("/file_download")
def hello():
    return '''
    <a href="/mp3_download">Click me.</a>
    
    <form method="get" action="mp3_download">
        <button type="submit">Download!</button>
    </form>
    '''

@app.route("/upload")
def upload():
	#<form action="http://localhost:5000/file_upload" method="POST"
	return '''
	<form action="/upload_test" method="POST"
			enctype="multipart/form-data">
		<input type="file" name="audio_data"/>
		<input type="submit"/>
	</form>
	'''

#@app.route("/file_upload")
#def file_upload():
#	if request.method == 'POST':
#		f = request.files['file']
#		print(f.filename)
#		f.save(secure_filename(f.filename))
#		return 'uploads dir -> file upload success'

@app.route("/upload_test", methods=['POST', 'GET'])
def upload_test():
	if request.method == "POST":
		f = request.files['audio_data']
		print(f.filename)
		print(f)

		(host, port) = ('localhost', 37373)
		s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
		s.connect((host, port))

		with open(f.filename, 'rb') as wave_file:
			for l in f:
				s.sendall(l)

		s.close()

		#with open('audio.wav', 'wb') as audio:
		#	f.save(audio)
		print('file upload Success')

#@app.route("/upload_test", methods=['POST', 'GET'])
#def index():
#    if request.method == "POST":
#        f = request.files['audio_data']
#		with open('audio.wav', 'wb') as audio:
#            f.save(audio)
#        print('file uploaded successfully')
#    else:
#        print('file uploaded failure')

if __name__ == '__main__':
	app.run(debug=True)


ModuleNotFoundError: No module named 'flask'