# AICUP2023_Spring_PathologicalVoice

https://tbrain.trendmicro.com.tw/Competitions/Details/27

多模態病理嗓音分類競賽

## Step 1: Get the training set from Google Drive

https://drive.google.com/file/d/1GvW2YMpBaEfPeEMbUPUpaRfauwNpymeH/view?usp=sharing

In [None]:
%%bash
pip install --upgrade --no-cache-dir gdown
gdown https://drive.google.com/uc?id=1GvW2YMpBaEfPeEMbUPUpaRfauwNpymeH
unzip train.zip
rm train.zip

In [None]:
!ls -l ./train/*.csv

## Step 2: Audio to Spectrogram images



In [None]:
import os
import glob
import subprocess
sound_path ="./train/training_voice_data/*.wav"
image_path ="./train/" 
files= glob.glob(sound_path)
for i, file_path in enumerate(files):
  path, filename = os.path.split(file_path)
  pre, ext = os.path.splitext(filename)
  spectroimage = pre + ".png"
  outputfile = os.path.join(image_path, spectroimage)
  # Load audio
  cmdstr=f'ffmpeg -i {file_path} -f wav - | ffmpeg -i - -filter_complex "showspectrumpic=s=260x260:mode=separate:legend=disabled" -c:v png -f image2pipe - | ffmpeg -y -i - {outputfile}'
  subprocess.run(cmdstr, shell=True)
print(len(files))

In [None]:
!ls ./train/*.png

## Step 3: Import python libraries

In [None]:
import os
import glob
import torch
from torch import nn
from torch.utils.data import Dataset, DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [None]:
device_name=torch.cuda.get_device_name(0)
print(f"Using GPU {device_name}")

In [None]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

## Step 4: read the training CSV file

In [None]:
import pandas as pd
df_train = pd.read_csv("./train/training datalist.csv")
print(df_train.shape)

In [None]:
df_train.head()

In [None]:
train_files = df_train.iloc[:,0].values
train_labels = df_train.iloc[:,3].values-1
print(train_labels[:10])

In [None]:
type(train_labels)

## Step 5: Show statistics of training images 

In [None]:
import seaborn as sns
g = sns.countplot(x=train_labels)

## Step 6: Choose one of CNN models 

### EfficientNet B0 to B7

__Model-EfficientNet__

https://pytorch.org/hub/nvidia_deeplearningexamples_efficientnet/

|  Base model | resolution  | Base model | resolution  |
|---|---|---|---|
| EfficientNetB0  | 224  | EfficientNetB4  | 380  |
| EfficientNetB1  | 240  | EfficientNetB5  | 456  |
| EfficientNetB2  | 260  | EfficientNetB6  | 528  |
| EfficientNetB3  | 300  | EfficientNetB7  | 600  |

### 也可以試其他的models
https://pytorch.org/vision/stable/models.html

In [None]:
num_classes=5

In [None]:
#modelfile = None
modelfile = "pv-EnB2-70.pth" 

In [None]:
import torchvision.models as models
model=models.efficientnet_b2(num_classes=num_classes)
if modelfile != None: model.load_state_dict(torch.load(modelfile))
model.cuda()

## Step 7: Instancing a dataloader 
* Transforms
* CustomDataset
* dataloader

In [None]:
from torchvision import transforms
pretrained_size = 240
pretrained_means = [0.485, 0.456, 0.406]
pretrained_stds= [0.229, 0.224, 0.225]
train_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = pretrained_means, std = pretrained_stds)
])

In [None]:
from PIL import Image 
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, images_folder, transform = None):
        self.df = pd.read_csv(csv_path)
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.df)
    def __getitem__(self, index):
        filename = self.df.iloc[index]['ID']+".png"
        label = self.df.iloc[index]['Disease category']-1
        image = Image.open(os.path.join(self.images_folder, filename))
        if self.transform is not None:
            image = self.transform(image)
        return image, label

## Step 8: Set up a train dataloader with a custom dataset

In [None]:
batches = 48
imgdir= "train" 
csvfile = "./train/training datalist.csv"

In [None]:
train_dataset = CustomDataset(csvfile, imgdir, train_transform)
train_dataloader = DataLoader(train_dataset,batch_size=batches, shuffle=True)
print(f"Total images={len(train_dataset)}")

## Step 9: total_batch

In [None]:
total_batch=len(train_dataset)//batches + 1
print(total_batch)

## Step 10: Set the loss function and optimizer

In [None]:
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001, momentum=0.6)

## Step 11: Train model

In [None]:
num_epochs = 20

In [None]:
for epoch in range(num_epochs):
  for i, (batch_images, batch_labels) in enumerate(train_dataloader):
    # Zero your gradients for every batch!
    optimizer.zero_grad()
    inputs = batch_images.cuda()
    labels = batch_labels.cuda()
    # Make predictions for this batch
    outputs  = model(inputs)
    
    # Compute the loss and its gradients
    cost = loss(outputs , labels)
    cost.backward()
    # Adjust learning weights
    optimizer.step()

    if (i+1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], lter [{i+1}/{total_batch}] Loss: {cost.item():.4f}')
  print(f'Epoch [{epoch+1}/{num_epochs}] Loss: {cost.item():.4f}')

### more epochs

In [None]:
num_epochs = 50
for epoch in range(num_epochs):
  for i, (batch_images, batch_labels) in enumerate(train_dataloader):
    # Zero your gradients for every batch!
    optimizer.zero_grad()
    inputs = batch_images.cuda()
    labels = batch_labels.cuda()
    # Make predictions for this batch
    outputs  = model(inputs)
    
    # Compute the loss and its gradients
    cost = loss(outputs , labels)
    cost.backward()
    # Adjust learning weights
    optimizer.step()

    if (i+1) % 1 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], lter [{i+1}/{total_batch}] Loss: {cost.item():.4f}')
  print(f'Epoch [{epoch+1}/{num_epochs}] Loss: {cost.item():.4f}')

## Step 12: Save the trained model

In [None]:
outputfile = "pv-EnB2-70.pth" 
torch.save(model.state_dict(), outputfile)

## Step 13: Check training results

In [None]:
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = pretrained_means, std = pretrained_stds)
])
batches =120

In [None]:
test_dataset = CustomDataset(csvfile, imgdir, test_transform)
test_dataloader = DataLoader(test_dataset,batch_size=batches, shuffle=False)
print(f"Total images={len(test_dataset)}")

In [None]:
classes=[0,1,2,3,4]

In [None]:
import numpy as np
train_predictions= np.zeros(len(train_labels))
train_outputs  = np.zeros((len(train_labels),num_classes))

In [None]:
print(train_predictions[:10])

In [None]:
import torch.nn.functional as F
model.eval()
# again no gradients needed
with torch.no_grad():
    total_batch = len(test_dataset)//batches
    for i, (batch_images, batch_labels) in enumerate(test_dataloader):
      images = batch_images.cuda()
      labels = batch_labels.cuda()
      outputs = model(images)
      _, predictions = torch.max(outputs, 1)
      train_predictions[i*batches:(i+1)*batches] = predictions.cpu()
      train_outputs[i*batches:(i+1)*batches, :]=F.softmax(outputs, dim=1).cpu()
      if (i+1) % 10 == 0:
          print(f'lter [{i+1}/{total_batch}]')

In [None]:
print(train_predictions[:10])

In [None]:
print(train_outputs[:10, :])

In [None]:
train_results=train_predictions.astype(int)

In [None]:
print(train_labels[:10])
print(train_results[:10])

In [None]:
print(train_predictions[:10])

# 從這裡加xgboost

In [None]:
from xgboost import XGBClassifier
# 建立 XGBClassifier 模型
xgboostModel = XGBClassifier(n_estimators=100, learning_rate= 0.3)

In [None]:
# 將空值填0
df_train['PPD'] = df_train['PPD'].fillna(0)
df_train['Voice handicap index - 10'] = df_train['Voice handicap index - 10'].fillna(0)

# 正規化過大的數值
df_train['Age'] = df_train['Age'] / 50
df_train['Voice handicap index - 10'] = df_train['Voice handicap index - 10'] / 40

In [None]:
cols=[1, 2]+[x for x in range(4, 28)]
print(cols)

In [None]:
X_train=df_train.iloc[:, cols]
X_train.shape


In [None]:
X_train=pd.concat([X_train, pd.DataFrame(train_outputs)], axis=1)
X_train.shape

In [None]:
X_train=X_train.to_numpy()
print(X_train)

In [None]:
# 使用訓練資料訓練模型
y_train = df_train.iloc[:,3].values-1
xgboostModel.fit(X_train, y_train)

In [None]:
# 使用訓練資料預測分類
train_predictions = xgboostModel.predict(X_train)

In [None]:
correct_pred = {classname: 0 for classname in classes}
total_pred = {classname: 0 for classname in classes}
# collect the correct predictions for each class
for label, prediction in zip(train_labels, train_predictions):
    if label == prediction:
        correct_pred[classes[label]] += 1
    total_pred[classes[label]] += 1
    
for classname, correct_count in correct_pred.items():
    accuracy = 100 * float(correct_count) / total_pred[classname]
    print(f'Accuracy for class: {classname} is {accuracy:.1f} %')


## Step 14: Analyze training results

In [None]:
from sklearn.metrics import confusion_matrix
confusion=confusion_matrix(train_labels, train_predictions)
print(confusion)

## 測試資料集
https://drive.google.com/file/d/12ARQ6z8HciF7UcKNtsZLOsmprKQQoYbm/view?usp=share_link

## Step 21: Load the test data

In [None]:
%%bash
gdown https://drive.google.com/uc?id=12ARQ6z8HciF7UcKNtsZLOsmprKQQoYbm
unzip public.zip
rm public.zip

## Step 22: Audio to Spectrogram images

In [None]:
import os
import glob
import subprocess
sound_path ="./public/test_data_public/*.wav"
image_path ="./public/" 
files= glob.glob(sound_path)
for i, file_path in enumerate(files):
  path, filename = os.path.split(file_path)
  pre, ext = os.path.splitext(filename)
  spectroimage = pre + ".png"
  outputfile = os.path.join(image_path, spectroimage)
  # Load audio
  cmdstr=f'ffmpeg -i {file_path} -f wav - | ffmpeg -i - -filter_complex "showspectrumpic=s=260x260:mode=separate:legend=disabled" -c:v png -f image2pipe - | ffmpeg -y -i - {outputfile}'
  subprocess.run(cmdstr, shell=True)
print(len(files))

In [None]:
!ls ./public/*.png

## Step 24: read the test CSV file

In [None]:
!ls -l ./public/*.csv

In [None]:
import pandas as pd
df_test = pd.read_csv("./public/test_datalist_public.csv")
print(df_test.shape)

In [None]:
df_test.head()

## Step 26：CustomTestDataset



In [None]:
from PIL import Image 
class CustomTestDataset(torch.utils.data.Dataset):
    def __init__(self, csv_path, images_folder, transform = None):
        self.df = pd.read_csv(csv_path)
        self.images_folder = images_folder
        self.transform = transform

    def __len__(self):
        return len(self.df)
    def __getitem__(self, index):
        filename = self.df.iloc[index]['ID']+".png"
        label = -1
        image = Image.open(os.path.join(self.images_folder, filename))
        if self.transform is not None:
            image = self.transform(image)
        return image, label

## Step 27: Instancing a dataloader 
* Transforms
* CustomDataset
* dataloader

In [None]:
test_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean = pretrained_means, std = pretrained_stds)
])

In [None]:
batches = 50
imgdir= "public" 
csvfile = "./public/test_datalist_public.csv"

In [None]:
test_dataset = CustomTestDataset(csvfile, imgdir, test_transform)
test_dataloader = DataLoader(test_dataset,batch_size=batches, shuffle=False)
print(f"Total images={len(test_dataset)}")

In [None]:
classes=[0,1,2,3,4]
test_predictions = np.zeros(len(test_dataset))
test_outputs = np.zeros((len(test_dataset),num_classes))

In [None]:
model.eval()
# again no gradients needed
with torch.no_grad():
    total_batch = len(test_dataset)//batches
    for i, (batch_images, batch_labels) in enumerate(test_dataloader):
      images = batch_images.cuda()
      outputs = model(images)
      _, predictions = torch.max(outputs, 1)
      test_predictions[i*batches:(i+1)*batches] = predictions.cpu()
      test_outputs[i*batches:(i+1)*batches, :] = F.softmax(outputs, dim=1).cpu()
      if (i+1) % 10 == 0:
          print(f'lter [{i+1}/{total_batch}]')

## Step 31: Check test results

In [None]:
test_predictions=test_predictions.astype(int)
test_predictions[:10]


# 挿入XGBoost

In [None]:
import pandas as pd
df_test = pd.read_csv("./public/test_datalist_public.csv")
print(df_test.shape)

In [None]:
# 將空值填0
df_test['PPD'] = df_test['PPD'].fillna(0)
df_test['Voice handicap index - 10'] = df_test['Voice handicap index - 10'].fillna(0)

# 正規化過大的數值
df_test['Age'] = df_test['Age'] / 50
df_test['Voice handicap index - 10'] = df_test['Voice handicap index - 10'] / 40

In [None]:
cols=[x for x in range(1, 27)]
print(cols)

In [None]:
X_test=df_test.iloc[:, cols]

In [None]:
test_outputs.shape

In [None]:
X_test=pd.concat([X_test, pd.DataFrame(test_outputs)], axis=1)
X_test.shape

In [None]:
# 使用訓練資料預測分類
y_test = xgboostModel.predict(X_test)

In [None]:
print(y_test[:10])

## Step 32: Load the template for test results

In [None]:
csvfile2 = "./public/submission_template_public.csv"

In [None]:
import pandas as pd
df_out = pd.read_csv(csvfile2, header = None)
print(df_out.shape)

In [None]:
df_out.head()

## Step 33: Fill the test results

In [None]:
df_out[1]=y_test+1
df_out.head()

In [None]:
df_out.shape

In [None]:
df_out.to_csv("esemble-b2-htchu-0515.csv", index=False, header=False)