# 載入套件

In [9]:
import numpy as np
import pandas as pd
import librosa
import torch
import random
from joblib import dump, load
import transformers
from transformers import AutoFeatureExtractor
from transformers import AutoConfig
from transformers import AutoModelForAudioClassification

In [2]:
seed = 42
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True  
torch.backends.cudnn.benchmark = False
device = "cuda" if torch.cuda.is_available() else "cpu"

# 資料與模型設定

In [3]:
public_medical_path = './Public Testing Dataset/test_datalist_public.csv'
public_acoustic_path = './Public Testing Dataset/test_data_public/'

In [4]:
final_model_path = "final_model.joblib"
medical_model_path = "AI_CUP_medical_model_voting_firsthalf.joblib"

with open("./accoustic_model_path.txt", "r") as file:
    accoustic_model_path = file.read().strip()
accoustic_model_path

'accoustic_model/checkpoint-1924'

# 載入訓練資料

In [5]:
# 讀取訓練資料集表單
public_source_df = pd.read_csv(public_medical_path)
print("public_source_df.shape :", public_source_df.shape)
print("public_source_df.columns :", public_source_df.columns)

public_source_df.shape : (500, 27)
public_source_df.columns : Index(['ID', 'Sex', 'Age', 'Narrow pitch range', 'Decreased volume', 'Fatigue',
       'Dryness', 'Lumping', 'heartburn', 'Choking', 'Eye dryness', 'PND',
       'Smoking', 'PPD', 'Drinking', 'frequency', 'Diurnal pattern',
       'Onset of dysphonia ', 'Noise at work', 'Occupational vocal demand',
       'Diabetes', 'Hypertension', 'CAD', 'Head and Neck Cancer',
       'Head injury', 'CVA', 'Voice handicap index - 10'],
      dtype='object')


# 資料前處理

In [6]:
from sklearn.preprocessing import StandardScaler
def medical_data_proccessing(df):
    # 這邊要加入ID  用於轉換資料時對應
    medical_col = ['ID','Sex', 'Age', 'Narrow pitch range',
                'Decreased volume', 'Fatigue', 'Dryness', 'Lumping', 'heartburn',
                'Choking', 'Eye dryness', 'PND', 'Smoking', 'PPD', 'Drinking',
                'frequency', 'Diurnal pattern', 'Onset of dysphonia ', 'Noise at work',
                'Occupational vocal demand', 'Diabetes', 'Hypertension', 'CAD',
                'Head and Neck Cancer', 'Head injury', 'CVA',
                'Voice handicap index - 10']

    # 將性別編碼0,1
    df['Sex'] = df['Sex'] - 1
    # 將空值填0
    df['PPD'] = df['PPD'].fillna(0)
    df['Voice handicap index - 10'] = df['Voice handicap index - 10'].fillna(0)

    # 正規化過大的數值
    standardScaler = StandardScaler()
    df[['Age','Voice handicap index - 10']] = standardScaler.fit_transform(df[['Age','Voice handicap index - 10']])

    return df

In [7]:
public_df = medical_data_proccessing(public_source_df)

# 在dataframe中加入要訓練的音檔路徑
public_df['wav_path'] = public_df['ID'].map(lambda x : f"{public_acoustic_path}{x}.wav")

print("df col :\n", public_df.columns)
print("df shape :", public_df.shape)

df col :
 Index(['ID', 'Sex', 'Age', 'Narrow pitch range', 'Decreased volume', 'Fatigue',
       'Dryness', 'Lumping', 'heartburn', 'Choking', 'Eye dryness', 'PND',
       'Smoking', 'PPD', 'Drinking', 'frequency', 'Diurnal pattern',
       'Onset of dysphonia ', 'Noise at work', 'Occupational vocal demand',
       'Diabetes', 'Hypertension', 'CAD', 'Head and Neck Cancer',
       'Head injury', 'CVA', 'Voice handicap index - 10', 'wav_path'],
      dtype='object')
df shape : (500, 28)


In [8]:
data_medical = public_df.iloc[:, :27]
print("data_medical col :\n", data_medical.columns)
print("data_medical shape :", data_medical.shape)

data_medical col :
 Index(['ID', 'Sex', 'Age', 'Narrow pitch range', 'Decreased volume', 'Fatigue',
       'Dryness', 'Lumping', 'heartburn', 'Choking', 'Eye dryness', 'PND',
       'Smoking', 'PPD', 'Drinking', 'frequency', 'Diurnal pattern',
       'Onset of dysphonia ', 'Noise at work', 'Occupational vocal demand',
       'Diabetes', 'Hypertension', 'CAD', 'Head and Neck Cancer',
       'Head injury', 'CVA', 'Voice handicap index - 10'],
      dtype='object')
data_medical shape : (500, 27)


# 模型預測

In [10]:
feature_extractor = AutoFeatureExtractor.from_pretrained(accoustic_model_path)
def preprocess_function(examples):
    audio_arrays = librosa.load(examples, sr=16000, offset=0)[0]
    inputs = feature_extractor(audio_arrays, sampling_rate=16000,max_length=32000,truncation=True, return_tensors="pt")
    return inputs

In [11]:
def second_stage_dataproccessing(training_df, acoustic_model, medical_model):
    training_id = training_df['ID'].tolist()
    data = pd.DataFrame()
    for id in training_id:
        
        # 取病理資料接續在mfcc特徵後面
        medical_data = training_df[training_df['ID']==id].iloc[:, 1:27]
        df = pd.DataFrame(medical_data.copy()).reset_index(drop=True)

        # 透過聲學模型預測此病人 (By frame)
        inputs = preprocess_function(training_df[training_df['ID']==id]['wav_path'].values[0])
        logits = acoustic_model(**inputs).logits[0]
        pred = np.argmax(logits.detach().cpu().numpy())
        # print(pred)
        frame_pred_df = pd.DataFrame({"acoustic_pred":[pred]})

        df = pd.concat([df, frame_pred_df], axis=1)  # 將聲學模型預測結果接在每個frame的特徵後面

        # 透過病理模型預測此病人
        medical_pred = medical_model.predict(medical_data)
        df['medical_pred'] = medical_pred[0]
        df['ID'] = id
        data = pd.concat([data, df])
        
    return data

In [12]:
medical_model = load(medical_model_path) 
acoustic_model = AutoModelForAudioClassification.from_pretrained(accoustic_model_path)

In [13]:
public_data = second_stage_dataproccessing(public_df, acoustic_model, medical_model)
print("training_data.shape :", public_data.shape)

training_data.shape : (500, 29)


In [14]:
clf = load(final_model_path) 

In [15]:
public_pred = pd.DataFrame(clf.predict(public_data.iloc[:,:-1].to_numpy()),columns=['Category'])
public_data = public_data.reset_index(drop=True)
public_output = pd.concat([public_data.iloc[:,-1],public_pred],axis=1)
public_output['Category'] = public_output['Category']+1



In [17]:
public_output.to_csv('submission.csv', index=False)