## SWAHILI NOTEBOOK
### Import all the necessary libraries

In [None]:
# !pip install timm -q
# !pip install fastai --upgrade --q
# !pip install fastaudio --q
# exit()

### Mount drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# %%time
# !unzip -o -q '/content/drive/MyDrive/Noise/swahili_audio_classification/Swahili_words.zip' -d "/content/Swahili_words"

In [None]:
import os
import librosa
import soundfile
from tqdm.notebook import tqdm

def reduce_time(path):
    y, sr = librosa.load(path)
    soundfile.write(path, y[sr//2:], sr)


for a_path in tqdm(os.listdir('/content/Swahili_words')):
    reduce_time(f"/content/Swahili_words/{a_path}")


In [None]:
import warnings
from fastai.vision.all import *
from fastaudio.core.all import *
from fastaudio.augment.all import *
import numpy as np

path = "/content/drive/MyDrive/Noise/swahili_audio_classification/"
warnings.filterwarnings('ignore')
seed = 2020

In [None]:
# Some of the augmentations you can use
# AddNoiseGPU, ChangeVolumeGPU, DeltaGPU, MaskFreqGPU, MaskTimeGPU, SignalCutoutGPU, SignalLossGPU, TfmResizeGPU
# help(TfmResizeGPU)

### Load the datasets

In [None]:
train = pd.read_csv(path+"Train.csv")
test = pd.read_csv(path+"Test.csv")
ss = pd.read_csv(path+"SampleSubmission.csv")

train

### Prepare the cross validation scheme

In [None]:
from sklearn.model_selection import StratifiedKFold

train["Word_id"] = "/content/Swahili_words/" + train["Word_id"]
test["Word_id"] = "/content/Swahili_words/" + test["Word_id"]

skf = StratifiedKFold(10, random_state=seed, shuffle=True)
for fold, (_, val_inx) in enumerate(skf.split(train, train.Swahili_word)):
    train.loc[val_inx, 'fold'] = fold
train.fold = train.fold.astype(int)
train

### Seed

In [None]:
def random_seed(seed_value, use_cuda): 
   np.random.seed(seed_value)
   torch.manual_seed(seed_value) 

   random.seed(seed_value)

   if use_cuda: 
      torch.cuda.manual_seed(seed_value) 
      torch.cuda.manual_seed_all(seed_value) 

      torch.backends.cudnn.deterministic = True 

      torch.backends.cudnn.benchmark = False 

random_seed(seed, True)
set_seed(seed, True)

### Transforms and data loader

In [None]:
augs = [
        AddNoiseGPU, ChangeVolumeGPU, DeltaGPU, MaskFreqGPU, MaskTimeGPU, SignalCutoutGPU, SignalLossGPU, TfmResizeGPU
]

In [None]:
params = {'n_fft':2048, 'hop_length':308, 'n_mels':224, 'sample_rate':16000, 'win_length':1024}
cfg = AudioConfig.BasicMelSpectrogram(**params) # pull up the voice config
aud2spec = AudioToSpec.from_cfg(cfg) # let audio to spec use the config

In [None]:
class AudioNormalize(Transform):
    "Normalizes a single `AudioTensor`."
    def encodes(self, x:AudioTensor): return (x-x.mean()) / x.std()

def get_dls(fold):
    torch.cuda.empty_cache()
    audio = DataBlock(blocks=(AudioBlock(force_mono=True, crop_signal_to=5000), 
                            CategoryBlock),
                        splitter=MaskSplitter(train.fold == fold),
                        get_x = ColReader(0),
                        get_y=ColReader(1),
                        item_tfms = [AudioNormalize],
                        batch_tfms = [aud2spec, ChangeVolume, SignalLoss]        
        )
    dls = audio.dataloaders(train, bs=64, num_workers=0, device='cuda')
    dls.rng.seed(seed)

    return dls

In [None]:
# help(cnn_learner)

### Modelling

In [None]:
predictions = []
for fold in [4]:
    print(f"\n\nFold {fold}")
    dls = get_dls(fold)
    model_name = f'/content/fold{fold}-model'
    learn = cnn_learner(dls, resnet50, CrossEntropyLossFlat(), metrics=accuracy, n_in=1)
    callbacks = [
            # EarlyStoppingCallback(monitor='valid_loss', min_delta=0.05, patience=6),
             SaveModelCallback(monitor='valid_loss', fname=model_name) 
             ]
    learn.unfreeze()
    lr = learn.lr_find(show_plot=False)[0]
    # learn.fine_tune(12, wd=0.1, base_lr=lr, cbs=callbacks)
    learn.fit_one_cycle(16, wd=0.1, lr_max=lr, cbs=callbacks)
    # lr = learn.lr_find(show_plot=False)[0]
    # learn.freeze_to(-2)
    # learn.fit_one_cycle(5, wd=0.1, cbs=callbacks)

    tdl = learn.dls.test_dl(test["Word_id"]) # quicly create test data loader

    model = learn.load(model_name)
    test_preds_tta, test_labels_tta = model.tta(dl=tdl, n=32)
    predictions.append(test_preds_tta)

In [None]:
ss[ss.columns[1:]] = np.mean([i.numpy() for i in predictions], axis=0)
ss.head()

### Submissions

In [None]:
from sklearn.metrics import log_loss 

val = pd.read_csv(path+'Validation.csv')[['Word_id', 'Swahili_word']]
chk = ss[ss.Word_id.apply(lambda x:x in val.Word_id.unique())]

log_loss(val.Swahili_word, chk[chk.columns[1:]])

In [None]:
name = "/content/final_sub.csv"
ss.to_csv(name, index=False)