In [None]:
# !pip install transformers
import sys
sys.path.append('../input/bird-filter-data/noisereduce')
import noisereduce as nr

In [None]:
import torch
import numpy as np
import random
import os
class config:
    num_fold = 10
    sample_rate= 32_000
    sampleNum = 32_000*5
    n_fft=1024
    win_length = 1024
    hop_length=512
    n_mels=64
    duration=5
    num_classes = 22
    train_batch_size = 32
    valid_batch_size = 32
    epochs = 20
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    learning_rate = 5e-5
    
config.device

In [None]:
import pandas as pd
import os
PATH_TRAIN_DATASET = "../input/scorebirddata/"
path_csv = os.path.join(PATH_TRAIN_DATASET, "Filter_Clip_Major_Score_Data.csv")
train_meta = pd.read_csv(path_csv)
train_meta

In [None]:
# train_meta = train_meta.dropna().reset_index(drop=True)

train_meta['new_filename'] = train_meta['filename'].str.replace('.ogg', '_') + train_meta['seg_index'].values.astype(int).astype(str) +'.ogg' 

In [None]:
train_meta['new_filename'].str.len().max()

In [None]:
import json

with open('../input/birdclef-2022/scored_birds.json') as fp:
    scored_birds = json.load(fp)

print(scored_birds)

In [None]:
index = train_meta[~train_meta['primary_label'].isin(scored_birds)]['primary_label'].index
index

In [None]:
for i in index:
    train_meta.iloc[i,0]='others'

In [None]:
train_meta

In [None]:
from sklearn.preprocessing import LabelEncoder

encoder = LabelEncoder()
train_meta['primary_label_encoded'] = encoder.fit_transform(train_meta['primary_label'])

In [None]:
encoder.classes_

# save encoder

In [None]:
np.save('encoder_list.npy',encoder.classes_)


In [None]:
from sklearn.model_selection import StratifiedKFold

skf = StratifiedKFold(n_splits=config.num_fold)
for k, (_, val_ind) in enumerate(skf.split(X=train_meta, y=train_meta['primary_label_encoded'])):
    train_meta.loc[val_ind, 'fold'] = k

In [None]:
def get_data(df,fold):
    train_df = df[~df['fold'].isin(fold)].reset_index(drop=True)
    valid_df = df[df['fold'].isin(fold)].reset_index(drop=True)
    return train_df,valid_df  
train_df,valid_df = get_data(train_meta,[7,8,9])

In [None]:
valid_df[['primary_label','filename']].groupby('primary_label').count().describe()

In [None]:
train_df[['primary_label','filename']].groupby('primary_label').count().describe()

In [None]:
import torchaudio
# STFT
n_fft = 1024
win_length = 1024
hop_length = 512
transform = torchaudio.transforms.Spectrogram(
    n_fft = n_fft,           # freqGroup = n_fft//2 + 1
    win_length = win_length, # freq gap for each group
    hop_length = hop_length, # length = samples / hop_length
    center = True,
    pad_mode = 'reflect',
    power=2.0
).to('cpu')
transform

In [None]:
transform = torchaudio.transforms.MFCC(
    sample_rate = 32000, 
    n_mfcc = 128, 
    dct_type = 2, 
    norm = 'ortho', 
    log_mels = False, 
)
transform

In [None]:
#!pip install noisereduce
from torch.utils.data import Dataset, DataLoader
import noisereduce as nr
import torchaudio
import random
import numpy as np
class BirdClefDataset(Dataset):
    def __init__(self, df):
        self.audio_paths = df['new_filename'].values
        self.labels = df['primary_label_encoded'].values
        self.stretch = torchaudio.transforms.TimeStretch()
        self.sr = 32000
    def __len__(self):
        return len(self.audio_paths)
    
    def __getitem__(self, index):
        filename = os.path.join(PATH_TRAIN_DATASET, 'Slice_data_score',self.audio_paths[index])
        waveform0,_ = torchaudio.load(filename)
        waveform=nr.reduce_noise(y=waveform0, sr=self.sr)
        waveform=torch.from_numpy(waveform)
        #waveform=torch.from_numpy(waveform2)
       
        splitPoint = random.randint(self.sr,self.sr*4)
        newWaveform=torch.cat([waveform[:,splitPoint:],waveform[:,:splitPoint]],dim=1)
        label = torch.tensor(self.labels[index])
        
        return transform(newWaveform), label

In [None]:
import torch.nn as nn
import torch.nn.functional as F
#from transformers import Wav2Vec2ForSequenceClassification


In [None]:
def loss_fn(outputs, labels):
    return nn.CrossEntropyLoss()(outputs, labels)

# Train

In [None]:
from sklearn.metrics import f1_score

def train(model, data_loader, optimizer, scheduler, device, epoch):
    model.train()
    pred = []
    label = []
    
    running_loss = 0
    acc = 0
    loop = tqdm(data_loader, position=0)
    for i, (spec, labels) in enumerate(loop):
        spec = spec.to(device)
        labels = labels.to(device)
        
        outputs = model(spec)
        _, preds = torch.max(outputs, 1)
        acc += (preds==labels).sum()
        
        loss = loss_fn(outputs, labels)
        
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        if scheduler is not None:
            scheduler.step()
            
        running_loss += loss.item()
        pred.extend(preds.view(-1).cpu().detach().numpy())
        label.extend(labels.view(-1).cpu().detach().numpy())
        
        loop.set_description(f"Epoch [{epoch+1}/{config.epochs}]")
        loop.set_postfix(loss=loss.item())

    return running_loss/len(data_loader),acc/(len(data_loader)*config.train_batch_size)

In [None]:
def valid(model, data_loader, device, epoch):
    model.eval()
    
    acc = 0
    running_loss = 0
    pred = []
    label = []

    loop = tqdm(data_loader, position=0)
    for spec, labels in loop:
        spec = spec.to(device)
        labels = labels.to(device)
        
        outputs = model(spec)
        _, preds = torch.max(outputs, 1)
        acc += (preds==labels).sum()
        
        loss = loss_fn(outputs, labels)
            
        running_loss += loss.item()
        
        pred.extend(preds.view(-1).cpu().detach().numpy())
        label.extend(labels.view(-1).cpu().detach().numpy())
        
        loop.set_description(f"Epoch [{epoch+1}/{config.epochs}]")
        loop.set_postfix(loss=loss.item())
        
    valid_f1 = f1_score(label, pred, average='macro')
    
    return running_loss/len(data_loader), valid_f1,acc/(len(data_loader)*config.valid_batch_size)

In [None]:
train_dataset = BirdClefDataset(train_df)
valid_dataset = BirdClefDataset(valid_df)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config.train_batch_size, shuffle=True,num_workers=os.cpu_count(),pin_memory=(torch.cuda.is_available()))
valid_loader = torch.utils.data.DataLoader(valid_dataset, batch_size=config.valid_batch_size, shuffle=True,num_workers=os.cpu_count(),pin_memory=(torch.cuda.is_available()))

In [None]:
#import torch
#import torchvision.models as models
#import torch.nn as nn
#class Net(nn.Module):
#    def __init__(self, model):
#        super(Net, self).__init__()
#        #self.conv1= nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=False)
#        # 取掉model的后1层
#        self.resnet_layer = nn.Sequential(*list(model.children())[:-1])
#        self.Linear_layer = nn.Linear(512, 22) #加上一层参数修改好的全连接层
# 
#    def forward(self, x):
#        x = self.resnet_layer(x)
#        x = x.view(x.size(0), -1)
#        x = self.Linear_layer(x)
#        return x
#resnet = models.resnet18(pretrained=True)
#resnet.conv1= nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,bias=False)
#model = Net(resnet)
##print(model.conv1)
#print(model)

In [None]:
import torch.nn as nn
import torch.utils.model_zoo as model_zoo
__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101',
           'resnet152']
 
model_urls = {
    'resnet18': '../input/weight1/resnet18-5c106cde.pth'
}
def conv3x3(in_planes, out_planes, stride=1):
    """3x3 convolution with padding"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride,
                     padding=1, bias=False)
 
def conv1x1(in_planes, out_planes, stride=1):
    """1x1 convolution"""
    return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
class BasicBlock(nn.Module):
    expansion = 1
 
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(BasicBlock, self).__init__()
        self.conv1 = conv3x3(inplanes, planes, stride)
        self.bn1 = nn.BatchNorm2d(planes)
        self.relu = nn.ReLU(inplace=True)
        self.conv2 = conv3x3(planes, planes)
        self.bn2 = nn.BatchNorm2d(planes)
        self.downsample = downsample
        self.stride = stride
 
    def forward(self, x):
        identity = x
 
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
 
        out = self.conv2(out)
        out = self.bn2(out)
 
        if self.downsample is not None:
            identity = self.downsample(x)
 
        out += identity
        out = self.relu(out)
 
        return out
class Bottleneck(nn.Module):
    expansion = 4
 
    def __init__(self, inplanes, planes, stride=1, downsample=None):
        super(Bottleneck, self).__init__()
        self.conv1 = conv1x1(inplanes, planes)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = conv3x3(planes, planes, stride)
        self.bn2 = nn.BatchNorm2d(planes)
        self.conv3 = conv1x1(planes, planes * self.expansion)
        self.bn3 = nn.BatchNorm2d(planes * self.expansion)
        self.relu = nn.ReLU(inplace=True)
        self.downsample = downsample
        self.stride = stride
 
    def forward(self, x):
        identity = x
 
        out = self.conv1(x)
        out = self.bn1(out)
        out = self.relu(out)
 
        out = self.conv2(out)
        out = self.bn2(out)
        out = self.relu(out)
 
        out = self.conv3(out)
        out = self.bn3(out)
 
        if self.downsample is not None:
            identity = self.downsample(x)
 
        out += identity
        out = self.relu(out)
 
        return out


class ResNet(nn.Module):
 
    def __init__(self, block, layers, num_classes=22, zero_init_residual=False):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer1 = self._make_layer(block, 64, layers[0])
        self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc = nn.Linear(512 * block.expansion, num_classes)
 
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, nn.BatchNorm2d):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)
 
        # Zero-initialize the last BN in each residual branch,
        # so that the residual branch starts with zeros, and each residual block behaves like an identity.
        # This improves the model by 0.2~0.3% according to https://arxiv.org/abs/1706.02677
        if zero_init_residual:
            for m in self.modules():
                if isinstance(m, Bottleneck):
                    nn.init.constant_(m.bn3.weight, 0)
                elif isinstance(m, BasicBlock):
                    nn.init.constant_(m.bn2.weight, 0)
 
    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes * block.expansion:
            downsample = nn.Sequential(
                conv1x1(self.inplanes, planes * block.expansion, stride),
                nn.BatchNorm2d(planes * block.expansion),
            )
 
        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))
        self.inplanes = planes * block.expansion
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))
 
        return nn.Sequential(*layers)
 
    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)
 
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
 
        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
 
        return x


def resnet18(pretrained=False, **kwargs):
    """Constructs a ResNet-18 model.
    Args:
        pretrained (bool): If True, returns a model pre-trained on ImageNet
    """
    model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs).to(config.device)
    if pretrained:
        model.load_state_dict(model_zoo.load_url(model_urls['resnet18']))
    return model

model=resnet18().to(config.device)
#print(model)

In [None]:
from tqdm import tqdm
 
# from transformers import Wav2Vec2Model, Wav2Vec2Config

# configuration = Wav2Vec2Config(num_labels=config.num_classes)
# model = Wav2Vec2ForSequenceClassification(configuration)


# PATH_Model = "../input/model-02f1"
# model_path = os.path.join(PATH_Model, "model.pt")
# model = BirdClefModel()
# model.load_state_dict(torch.load(model_path,map_location='cpu'))


optimizer = torch.optim.Adam(model.parameters(), lr=5e-5)
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, eta_min=1e-5, T_max=5)

for epoch in range(20):
    train_loss,train_acc = train(model, train_loader, optimizer, scheduler, config.device, epoch)
    valid_loss, valid_f1,valid_acc = valid(model, valid_loader, config.device, epoch)
    print('train Loss: ',train_loss,'train_acc: ',train_acc.item(),'valid Loss: ',valid_loss,'valid_acc: ',valid_acc.item()," vaild_F1",valid_f1)

In [None]:
model

In [None]:
torch.save(model.state_dict(),"model_20.pt")


In [None]:
torch.save(model,'model_20.pth')

In [None]:
model = torch.load('../input/rennet18/model_20.pth')