## 전처리

In [2]:
import tensorflow as tf
from tensorflow.keras import layers,Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
from concurrent.futures import ThreadPoolExecutor
import librosa
import numpy as np
from tqdm import tqdm
import pandas as pd
import os
import glob


from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
def audio_preprocess(wav, sr=16000, n_mels=128):
  ori_y1, sr1 = librosa.load(wav, sr=sr)
  mel_spec1 = librosa.feature.melspectrogram(y=ori_y1, sr=sr1, n_mels=n_mels)
  mel_db1 = librosa.power_to_db(mel_spec1, ref=np.max)
  length = mel_db1.shape[1]
  if length > 312:
    length = 312
  with open(wav, 'rb') as f:
    wav_data = f.read()
  bytes_per_sample = 2
  duration = len(wav_data) / (sr * bytes_per_sample)

  return mel_db1, round(duration,3), length

In [4]:
def wav_padding(wav, wav_max_len=312):
  pad_width = wav_max_len - wav.shape[1]
  if pad_width > 0:
    padded = np.pad(wav, pad_width=((0,0),(0,pad_width)), mode='constant', constant_values=-80)
  elif pad_width == 0:
    padded = wav
  elif pad_width < 0:
    padded =wav[:,:wav_max_len]
  return padded

In [5]:
def pred_preprocess(wav_path, sr=16000, n_mels=128):
  pred_,_,_ = audio_preprocess(wav_path, sr=sr, n_mels=n_mels)
  pad_pred_ = wav_padding(pred_)
  x_padded_pred_data = np.stack([pad_pred_])
  pred_audio_transposed = np.transpose(x_padded_pred_data, (0, 2, 1))
  x_pred_data = np.expand_dims(pred_audio_transposed, axis=-1)
  return x_pred_data

In [6]:
def build_ptk_model():
  inputs = layers.Input(shape=(312,128,1))

  BatNor_01 = layers.BatchNormalization(momentum=0.9)
  BatNor_01_out = BatNor_01(inputs)

  Conv2d_01 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')
  Conv2d_01_out = Conv2d_01(BatNor_01_out)
  Conv2d_02 = layers.Conv2D(32, (3, 3), activation='relu', padding='same')
  Conv2d_02_out = Conv2d_02(Conv2d_01_out)
  AvgPool_01 = layers.AveragePooling2D((2, 2))
  AvgPool_01_out = AvgPool_01(Conv2d_02_out)

  Conv2d_03 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')
  Conv2d_03_out = Conv2d_03(AvgPool_01_out)
  Conv2d_04 = layers.Conv2D(64, (3, 3), activation='relu', padding='same')
  Conv2d_04_out = Conv2d_04(Conv2d_03_out)
  AvgPool_02 = layers.AveragePooling2D((2, 2))
  AvgPool_02_out = AvgPool_02(Conv2d_04_out)

  Conv2d_05 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')
  Conv2d_05_out = Conv2d_05(AvgPool_02_out)
  Conv2d_06 = layers.Conv2D(128, (3, 3), activation='relu', padding='same')
  Conv2d_06_out = Conv2d_06(Conv2d_05_out)
  AvgPool_03 = layers.AveragePooling2D((2, 2))
  AvgPool_03_out = AvgPool_03(Conv2d_06_out)

  b,t,f,c = AvgPool_03_out.shape
  Reshape_01 = layers.Reshape((-1,f*c))
  Reshape_01_out = Reshape_01(AvgPool_03_out)

  Dense_01 = layers.Dense(128, activation='relu')
  Dense_01_out = Dense_01(Reshape_01_out)

  Flat_01 = layers.Flatten()
  Flat_01_out = Flat_01(Dense_01_out)

  Dense_02 = layers.Dense(256,activation='relu')
  Dense_02_out = Dense_02(Flat_01_out)

  Dense_03 = layers.Dense(1)
  Dense_03_out = Dense_03(Dense_02_out)

  return Model(inputs=inputs, outputs=Dense_03_out)

## teo

In [7]:
import pandas as pd
df=pd.read_csv('/content/drive/MyDrive/2025sesac/최종프로젝트/data/labeled_data_D.csv',header=1,index_col=0)
teo_label=df.loc[:,['teo_1st','teo_2nd','teo_3rd']]

import os
import glob
folder ='/content/drive/MyDrive/2025sesac/최종프로젝트/data/upload'
pattern_4 = os.path.join(folder, '**', 'CLAP_D', '1', 'p_4*.wav')
pattern_5 = os.path.join(folder, '**', 'CLAP_D', '1', 'p_5*.wav')
pattern_6 = os.path.join(folder, '**', 'CLAP_D', '1', 'p_6*.wav')
wav_files_4 = glob.glob(pattern_4, recursive=True)
wav_files_5 = glob.glob(pattern_5, recursive=True)
wav_files_6 = glob.glob(pattern_6, recursive=True)

teo_label_1 = df.loc[:,['teo_1st']]
for path in wav_files_4:
  idx = int(path.split('/')[-4])
  teo_label_1.loc[idx,'path'] = path
teo_label_1.columns = ['score','path']
teo_label_1.dropna(inplace=True)

teo_label_2 = df.loc[:,['teo_2nd']]
for path in wav_files_5:
  idx = int(path.split('/')[-4])
  teo_label_2.loc[idx,'path'] = path
teo_label_2.columns = ['score','path']
teo_label_2.dropna(inplace=True)

teo_label_3 = df.loc[:,['teo_3rd']]
for path in wav_files_6:
  idx = int(path.split('/')[-4])
  teo_label_3.loc[idx,'path'] = path
teo_label_3.columns = ['score','path']
teo_label_3.dropna(inplace=True)

teo_label = pd.concat([teo_label_1,teo_label_2,teo_label_3])
teo_label.reset_index(drop=True,inplace=True)

In [8]:
from concurrent.futures import ThreadPoolExecutor
import librosa
import numpy as np
from tqdm import tqdm

wav_files = teo_label['path'].tolist()

n_mels = 128
sr_param = [16000] * len(wav_files)
mel_param = [n_mels] * len(wav_files)

with ThreadPoolExecutor() as ex:
    results = list(tqdm(ex.map(audio_preprocess, wav_files, sr_param, mel_param), total=len(wav_files)))

wav_info = pd.DataFrame(results, columns=['wav_data', 'duration', 'wav_length'])
b_pad_wav = wav_info['wav_data']

with ThreadPoolExecutor() as ex:
    audio_padded_data = list(tqdm(ex.map(wav_padding, b_pad_wav), total=len(b_pad_wav)))

teo_x_padded_data = np.stack(audio_padded_data)
teo_x_data = np.transpose(teo_x_padded_data, (0, 2, 1))

teo_x_data_length = wav_info['wav_length'].to_numpy()

teo_y_data = np.array(teo_label['score'].tolist())

100%|██████████| 201/201 [00:29<00:00,  6.75it/s]
100%|██████████| 201/201 [00:00<00:00, 203194.77it/s]


In [9]:
teo_x_train_, teo_x_test, teo_x_data_length_train_, teo_x_data_length_test, teo_y_train_, teo_y_test = train_test_split(teo_x_data, teo_x_data_length, teo_y_data, test_size=0.1)
teo_x_train, teo_x_valid, teo_x_data_length_train, teo_x_data_length_valid, teo_y_train, teo_y_valid = train_test_split(teo_x_train_, teo_x_data_length_train_, teo_y_train_, test_size=0.1)

teo_x_train.shape, teo_x_valid.shape, teo_x_test.shape, teo_x_data_length_train.shape, teo_x_data_length_valid.shape, teo_x_data_length_test.shape, teo_y_train.shape, teo_y_valid.shape, teo_y_test.shape

((162, 312, 128),
 (18, 312, 128),
 (21, 312, 128),
 (162,),
 (18,),
 (21,),
 (162,),
 (18,),
 (21,))

In [10]:
teo_model_1 = build_ptk_model()
teo_model_1.summary()

In [11]:
teo_model_1.compile(optimizer='adam', loss='mse', metrics=['mae'])
teo_model_1.fit(teo_x_train, teo_y_train, epochs=100, batch_size=16, validation_data=(teo_x_valid, teo_y_valid),
                callbacks=[EarlyStopping(patience=10, monitor='val_loss'), ModelCheckpoint('teo_model.keras',
                                                                                           save_best_only=True)],
                verbose=1)

Epoch 1/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 1s/step - loss: 13.9329 - mae: 3.2452 - val_loss: 12.9253 - val_mae: 3.2436
Epoch 2/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 64ms/step - loss: 8.7851 - mae: 2.6053 - val_loss: 5.3733 - val_mae: 1.8891
Epoch 3/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 60ms/step - loss: 4.1037 - mae: 1.6916 - val_loss: 1.8897 - val_mae: 1.0416
Epoch 4/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 58ms/step - loss: 1.8537 - mae: 1.1443 - val_loss: 1.5958 - val_mae: 0.9770
Epoch 5/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 43ms/step - loss: 1.5580 - mae: 1.0209 - val_loss: 1.6488 - val_mae: 0.9883
Epoch 6/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 43ms/step - loss: 2.1225 - mae: 1.2244 - val_loss: 2.3826 - val_mae: 1.2895
Epoch 7/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 57ms/step -

<keras.src.callbacks.history.History at 0x7ee8b41d05c0>

In [12]:
teo_model_1 = tf.keras.models.load_model('teo_model.keras')
teo_model_1.evaluate(teo_x_test, teo_y_test)

pred_ = teo_model_1.predict(teo_x_data)
pred = np.round(pred_,1)
for i in teo_label.index:
  teo_label.loc[i,'pred'] = pred[i][0]
print(teo_label[['score','pred']].corr())

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3s/step - loss: 0.9297 - mae: 0.7597
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 286ms/step
          score      pred
score  1.000000  0.901917
pred   0.901917  1.000000


## ptk

In [13]:
import pandas as pd
df=pd.read_csv('/content/drive/MyDrive/2025sesac/최종프로젝트/data/labeled_data_D.csv',header=1,index_col=0)
ptk_label=df.loc[:,['ptk_1st','ptk_2nd','ptk_3rd']]

import os
import glob
folder ='/content/drive/MyDrive/2025sesac/최종프로젝트/data/upload'
pattern_10 = os.path.join(folder, '**', 'CLAP_D', '1', 'p_10*.wav')
pattern_11 = os.path.join(folder, '**', 'CLAP_D', '1', 'p_11*.wav')
pattern_12 = os.path.join(folder, '**', 'CLAP_D', '1', 'p_12*.wav')
wav_files_10 = glob.glob(pattern_10, recursive=True)
wav_files_11 = glob.glob(pattern_11, recursive=True)
wav_files_12 = glob.glob(pattern_12, recursive=True)

ptk_label_1 = df.loc[:,['ptk_1st']]
for path in wav_files_10:
  idx = int(path.split('/')[-4])
  ptk_label_1.loc[idx,'path'] = path
ptk_label_1.columns = ['score','path']
ptk_label_1.dropna(inplace=True)

ptk_label_2 = df.loc[:,['ptk_2nd']]
for path in wav_files_11:
  idx = int(path.split('/')[-4])
  ptk_label_2.loc[idx,'path'] = path
ptk_label_2.columns = ['score','path']
ptk_label_2.dropna(inplace=True)

ptk_label_3 = df.loc[:,['ptk_3rd']]
for path in wav_files_12:
  idx = int(path.split('/')[-4])
  ptk_label_3.loc[idx,'path'] = path
ptk_label_3.columns = ['score','path']
ptk_label_3.dropna(inplace=True)

ptk_label = pd.concat([ptk_label_1,ptk_label_2,ptk_label_3])
ptk_label.reset_index(drop=True,inplace=True)

In [14]:
wav_files = ptk_label['path'].tolist()

n_mels = 128
sr_param = [16000] * len(wav_files)
mel_param = [n_mels] * len(wav_files)

with ThreadPoolExecutor() as ex:
    results = list(tqdm(ex.map(audio_preprocess, wav_files, sr_param, mel_param), total=len(wav_files)))

wav_info = pd.DataFrame(results, columns=['wav_data', 'duration', 'wav_length'])
b_pad_wav = wav_info['wav_data']

with ThreadPoolExecutor() as ex:
    audio_padded_data = list(tqdm(ex.map(wav_padding, b_pad_wav), total=len(b_pad_wav)))

ptk_x_padded_data = np.stack(audio_padded_data)
ptk_x_data = np.transpose(ptk_x_padded_data, (0, 2, 1))

ptk_x_data_length = wav_info['wav_length'].to_numpy()

ptk_y_data = np.array(ptk_label['score'].tolist())

100%|██████████| 199/199 [00:16<00:00, 11.84it/s]
100%|██████████| 199/199 [00:00<00:00, 9021.86it/s]


In [15]:
ptk_x_train_, ptk_x_test, ptk_x_data_length_train_, ptk_x_data_length_test, ptk_y_train_, ptk_y_test = train_test_split(ptk_x_data, ptk_x_data_length, ptk_y_data, test_size=0.1)
ptk_x_train, ptk_x_valid, ptk_x_data_length_train, ptk_x_data_length_valid, ptk_y_train, ptk_y_valid = train_test_split(ptk_x_train_, ptk_x_data_length_train_, ptk_y_train_, test_size=0.1)

ptk_x_train.shape, ptk_x_valid.shape, ptk_x_test.shape, ptk_x_data_length_train.shape, ptk_x_data_length_valid.shape, ptk_x_data_length_test.shape, ptk_y_train.shape, ptk_y_valid.shape, ptk_y_test.shape

((161, 312, 128),
 (18, 312, 128),
 (20, 312, 128),
 (161,),
 (18,),
 (20,),
 (161,),
 (18,),
 (20,))

In [16]:
inputs = layers.Input(shape=(312,128,1))

BatNor_01 = layers.BatchNormalization(momentum=0.9)
BatNor_01_out = BatNor_01(inputs)

Conv2d_01 = layers.Conv2D(32, (6, 3), activation='relu', padding='same')
Conv2d_01_out = Conv2d_01(BatNor_01_out)
Conv2d_02 = layers.Conv2D(32, (6, 3), activation='relu', padding='same')
Conv2d_02_out = Conv2d_02(Conv2d_01_out)
AvgPool_01 = layers.AveragePooling2D((2, 2))
AvgPool_01_out = AvgPool_01(Conv2d_02_out)

Conv2d_03 = layers.Conv2D(64, (6, 3), activation='relu', padding='same')
Conv2d_03_out = Conv2d_03(AvgPool_01_out)
Conv2d_04 = layers.Conv2D(64, (6, 3), activation='relu', padding='same')
Conv2d_04_out = Conv2d_04(Conv2d_03_out)
AvgPool_02 = layers.AveragePooling2D((2, 2))
AvgPool_02_out = AvgPool_02(Conv2d_04_out)

Conv2d_05 = layers.Conv2D(128, (6, 3), activation='relu', padding='same')
Conv2d_05_out = Conv2d_05(AvgPool_02_out)
Conv2d_06 = layers.Conv2D(128, (6, 3), activation='relu', padding='same')
Conv2d_06_out = Conv2d_06(Conv2d_05_out)
AvgPool_03 = layers.AveragePooling2D((2, 2))
AvgPool_03_out = AvgPool_03(Conv2d_06_out)

b,t,f,c = AvgPool_03_out.shape
Reshape_01 = layers.Reshape((-1,f*c))
Reshape_01_out = Reshape_01(AvgPool_03_out)

Dense_01 = layers.Dense(128, activation='relu')
Dense_01_out = Dense_01(Reshape_01_out)

Flat_01 = layers.Flatten()
Flat_01_out = Flat_01(Dense_01_out)

Dense_02 = layers.Dense(256,activation='relu')
Dense_02_out = Dense_02(Flat_01_out)

Dense_03 = layers.Dense(1)
Dense_03_out = Dense_03(Dense_02_out)

ptk_model_1 = Model(inputs=inputs, outputs=Dense_03_out)
ptk_model_1.summary()

In [17]:
ptk_model_1.compile(optimizer='adam', loss='mse', metrics=['mae'])
ptk_model_1.fit(ptk_x_train, ptk_y_train, epochs=100, batch_size=16, validation_data=(ptk_x_valid, ptk_y_valid), callbacks=[EarlyStopping(patience=10, monitor='val_loss'), ModelCheckpoint('ptk_model.keras', save_best_only=True)], verbose=1)

Epoch 1/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 1s/step - loss: 38.3133 - mae: 3.2056 - val_loss: 2.3587 - val_mae: 1.3526
Epoch 2/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 106ms/step - loss: 1.9290 - mae: 1.0987 - val_loss: 0.7017 - val_mae: 0.6588
Epoch 3/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step - loss: 0.6674 - mae: 0.6282 - val_loss: 0.7701 - val_mae: 0.6930
Epoch 4/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 87ms/step - loss: 0.6687 - mae: 0.6050 - val_loss: 0.8493 - val_mae: 0.7380
Epoch 5/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 98ms/step - loss: 0.6437 - mae: 0.6310 - val_loss: 0.5846 - val_mae: 0.5590
Epoch 6/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 102ms/step - loss: 0.7027 - mae: 0.6346 - val_loss: 0.4904 - val_mae: 0.5200
Epoch 7/100
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 89ms/step

<keras.src.callbacks.history.History at 0x7ee8ab873560>

In [18]:
ptk_model_1 = tf.keras.models.load_model('ptk_model.keras')
ptk_model_1.evaluate(ptk_x_test, ptk_y_test)

pred_ = ptk_model_1.predict(ptk_x_data)
pred = np.round(pred_,1)
for i in ptk_label.index:
  ptk_label.loc[i,'pred'] = pred[i][0]
ptk_label[['score','pred']].corr()

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - loss: 0.1277 - mae: 0.2869
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 409ms/step


Unnamed: 0,score,pred
score,1.0,0.835201
pred,0.835201,1.0
