In [53]:
import random
import pandas as pd
import numpy as np
import os
from tqdm.auto import tqdm
import librosa

from sklearn.tree import DecisionTreeClassifier
import os
import random
import warnings
from sklearn.model_selection import train_test_split

import librosa
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from tqdm.auto import tqdm
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

from transformers import AutoModelForAudioClassification, Wav2Vec2FeatureExtractor

warnings.filterwarnings(action='ignore')
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'
import warnings
warnings.filterwarnings(action='ignore')

In [54]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')
train_df.info()

train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
# valid_df.reset_index(drop=True, inplace=True)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5001 entries, 0 to 5000
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      5001 non-null   object
 1   path    5001 non-null   object
 2   label   5001 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 117.3+ KB


In [55]:
CFG = {
    'SR':16000,
    'N_MFCC':128, # Melspectrogram 벡터를 추출할 개수
    'SEED':42
}

In [56]:
def seed_everything(seed):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
#     torch.manual_seed(seed)
#     torch.cuda.manual_seed(seed)
#     torch.backends.cudnn.deterministic = True
#     torch.backends.cudnn.benchmark = True

seed_everything(CFG['SEED']) # Seed 고정

In [None]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')
train_df.info()
# train_df, valid_df = train_test_split(train_df, test_size=0.2, random_state=CFG['SEED'])

def speech_file_to_array_fn(df):
    feature = []
    for path in tqdm(df['path']):
        # path = '/content/drive/MyDrive/hi/sound01' + path[1:] 
        speech_array, _ = librosa.load(path, sr=CFG['SR'])
        feature.append(1000*speech_array**3)
    return feature

train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)

# valid_df.reset_index(drop=True, inplace=True)

train_x = speech_file_to_array_fn(train_df)
test_x = speech_file_to_array_fn(test_df)
# valid_x = speech_file_to_array_fn(valid_df)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5001 entries, 0 to 5000
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      5001 non-null   object
 1   path    5001 non-null   object
 2   label   5001 non-null   int64 
dtypes: int64(1), object(2)
memory usage: 117.3+ KB


  0%|          | 0/5001 [00:00<?, ?it/s]

  0%|          | 0/1881 [00:00<?, ?it/s]

In [None]:
import matplotlib.pyplot as plt
plt.plot(1000*librosa.load(train_df['path'][1020], sr=CFG['SR'])[0]**3)

In [8]:
MODEL_NAME = "facebook/wav2vec2-base"
processor = Wav2Vec2FeatureExtractor.from_pretrained(MODEL_NAME)

class CustomDataSet(torch.utils.data.Dataset):
    def __init__(self, x, y, processor):
        self.x = x
        self.y = y
        self.processor = processor

    def __len__(self):
        return len(self.x)

    def __getitem__(self, idx):
        input_values = self.processor(self.x[idx], sampling_rate=CFG['SR'], return_tensors="pt", padding=True).input_values
        if self.y is not None:
            return input_values.squeeze(), self.y[idx]
        else:
            return input_values.squeeze()

def collate_fn(batch):
    x, y = zip(*batch)
    x = pad_sequence([torch.tensor(xi) for xi in x], batch_first=True)
    y = pad_sequence([torch.tensor([yi]) for yi in y], batch_first=True)  # Convert scalar targets to 1D tensors
    return x, y

def create_data_loader(dataset, batch_size, shuffle, collate_fn, num_workers=0):
    return DataLoader(dataset,
                      batch_size=batch_size,
                      shuffle=shuffle,
                      collate_fn=collate_fn,
                      num_workers=num_workers
                      )

train_dataset = CustomDataSet(train_x, train_df['label'], processor)
test_dataset = CustomDataSet(test_x, y=None, processor=processor)

In [None]:
train_df['label'].value_counts()

# 0: angry
# 1: fear
# 2: sad
# 3: disgust
# 4: neutral
# 5: happy

In [None]:
# features = []
# path = train_df['path'][0]
        
# y, sr = librosa.load(path, sr=CFG['SR'])
# y = list(y)
# y.extend([0 for _ in range(80000-len(y))])
# features.append(y)

# len(y)

In [57]:
def get_feature_mel(df):
    features = []
    for i in tqdm(df['path']):
        # i = '/content/drive/MyDrive/hi/sound01'+i[1:]
        data, sr = librosa.load(i, sr=CFG['SR'])
        data = 1000*data**3
        n_fft = 2048
        win_length = 2048
        hop_length = 1024
        n_mels = 128
 
        D = np.abs(librosa.stft(data, n_fft=n_fft, win_length = win_length, hop_length=hop_length))
        mel = librosa.feature.melspectrogram(S=D, sr=sr, n_mels=n_mels, hop_length=hop_length, win_length=win_length)

        m_mel = mel.mean(axis=1)
        features.append(m_mel)
    return np.array(features)

train_mel = get_feature_mel(train_df)
# valid_mel = get_feature_mel(valid_df)
test_mel = get_feature_mel(test_df)

  0%|          | 0/5001 [00:00<?, ?it/s]

  0%|          | 0/1881 [00:00<?, ?it/s]

In [None]:
import os
import random
import warnings
from sklearn.model_selection import train_test_split

import librosa
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from tqdm.auto import tqdm
from torch.nn.utils.rnn import pad_sequence
from torch.utils.data import DataLoader

from transformers import AutoModelForAudioClassification, Wav2Vec2FeatureExtractor

warnings.filterwarnings(action='ignore')

In [None]:
import matplotlib.pyplot as plt
plt.plot(train_mel[0])

In [58]:
def get_mfcc_feature(df):
    features = []
#     for path in tqdm(df['path']):
        
#         y, sr = librosa.load(path, sr=CFG['SR'])
#         y = list(y)
#         y.extend([0 for _ in range(80100-len(y))])
#         features.append(y)
    for path in tqdm(df['path']):
        # librosa패키지를 사용하여 wav 파일 load
        y, sr = librosa.load(path, sr=CFG['SR'])
        # y = 1000*y**3
        # librosa패키지를 사용하여 mfcc 추출
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=CFG['N_MFCC'])
        y_feature = []
        # 추출된 MFCC들의 평균을 Feature로 사용
        for e in mfcc:
            y_feature.append(np.mean(e))
        features.append(y_feature)
    return features
    # return pd.DataFrame(features,columns=['freq'])

vector = get_mfcc_feature(train_df)
test_mfcc = get_mfcc_feature(test_df)

  0%|          | 0/5001 [00:00<?, ?it/s]

  0%|          | 0/1881 [00:00<?, ?it/s]

In [None]:
plt.plot(vector[0])

In [None]:
# train_df[train_df['label'] == 0].index

In [None]:
# from sklearn.decomposition import PCA
# pca 

# pd.DataFrameFrame(vector.iloc[])

In [None]:
# from sklearn.decomposition import PCA
# pca = PCA(n_components=600)
# pca.fit(vector)
# target = pd.DataFrame(pca.transform(vector))
# target.to_csv('./origin_600_pca.csv')

In [None]:
# import numpy as np
# max([len(i) for i in test])

In [None]:
# test = pd.DataFrame(pca.transform(test))
# test.to_csv('./test_600_pca.csv')

# print(sum(pca.explained_variance_ratio_))

In [None]:
target = pd.read_csv('./origin_600_pca.csv')
conc = pd.DataFrame([list(reversed(list(target.iloc[i,:]))) for i in range(len(target))])

In [None]:
conc = conc.rename(columns={i:f'pca_{i}' for i in range(600)})
target = target.rename(columns={f'{i}':f'pca_{i}' for i in range(600)}).drop('Unnamed: 0',axis=1)

In [None]:
target = pd.concat([target,conc])
target

In [59]:
vector = pd.DataFrame(vector,columns=[f'mfcc_{i}' for i in range(128)])
train = pd.DataFrame(train_mel,columns=[f'mel_{i}' for i in range(128)])
# train = train.rename(columns={f'{i}':f'mel_{i}' for i in range(128)})
train = pd.concat([train,vector],axis=1)
# train = pd.concat([train,tp],axis=1)
# del(tp)
train

Unnamed: 0,mel_0,mel_1,mel_2,mel_3,mel_4,mel_5,mel_6,mel_7,mel_8,mel_9,...,mfcc_118,mfcc_119,mfcc_120,mfcc_121,mfcc_122,mfcc_123,mfcc_124,mfcc_125,mfcc_126,mfcc_127
0,0.013719,0.010585,0.011365,0.015147,0.015964,0.012583,0.014782,0.022000,0.022887,0.021157,...,-0.475243,0.129531,-0.105115,-0.443991,-0.567216,-0.303936,0.407998,0.501475,-0.109036,-0.058342
1,0.024698,0.017155,0.015430,0.017026,0.020601,0.027584,0.037012,0.027860,0.029919,0.027653,...,0.629086,0.514676,-0.314312,-0.197818,-0.422195,0.004201,-0.217671,0.523408,0.020408,0.070081
2,0.567651,0.380975,0.426005,0.497681,0.656522,1.292413,1.174476,0.579839,0.448215,0.628672,...,-0.282975,-0.356842,-0.286293,-0.212861,-0.013879,-0.709901,-0.594326,-0.343695,-0.338751,-0.171855
3,0.622868,0.436759,0.338462,0.411688,0.631594,0.584484,0.391578,0.358792,0.512780,0.648389,...,0.205021,0.189386,-0.311290,-0.363777,-0.499418,-1.088192,-0.685789,0.120570,-0.686173,-0.157101
4,3.222057,3.299814,4.119013,4.295127,3.677306,6.946488,6.592564,5.046597,2.378346,2.362655,...,-0.996369,0.035772,-0.338776,-1.020306,-1.039265,-0.053770,0.154234,0.168259,-0.575030,-1.013335
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4996,0.071441,0.051095,0.074915,0.120894,0.138325,0.085271,0.096072,0.140013,0.149688,0.169614,...,0.023185,0.064578,0.330306,-0.484907,0.141396,0.035094,-0.244010,0.037485,-0.131850,-0.035452
4997,104.478737,57.853981,33.674690,22.962410,18.590714,19.335785,18.104355,16.781382,17.089344,21.422354,...,0.731323,-0.307702,0.357692,-0.328093,-0.237218,-0.570983,0.324473,-0.349720,-0.755602,-0.222972
4998,0.249757,0.187390,0.139185,0.187498,0.285357,0.494518,0.362228,0.236297,0.224831,0.261364,...,-0.083660,0.774606,-0.375393,-0.750303,-0.261566,-0.420591,0.519029,0.504589,-0.008825,-0.609881
4999,0.029698,0.022480,0.024308,0.032744,0.044098,0.030326,0.027065,0.034531,0.035271,0.050390,...,0.015787,-0.581784,-0.564808,-0.924709,-0.827351,-0.884314,-0.117483,-0.039797,0.120997,-0.864221


In [10]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [48]:
def build_model():
    model = keras.Sequential([
        layers.Dense(256, activation='relu', input_shape=[len(train.keys())]),
        # layers.MaxPooling1D(pool_size=2,strides=1, padding='valid'),
        layers.Dense(512),
        layers.Dropout(0.1),
        layers.Dense(6,activation='softmax')
        ])

    optimizer = tf.keras.optimizers.Adam(0.005)

    model.compile(loss=tf.keras.losses.sparse_categorical_crossentropy,
                optimizer='adam',
                metrics=['accuracy'])
    return model
model = build_model()

In [49]:
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_16 (Dense)            (None, 256)               33024     
                                                                 
 dense_17 (Dense)            (None, 512)               131584    
                                                                 
 dropout (Dropout)           (None, 512)               0         
                                                                 
 dense_18 (Dense)            (None, 6)                 3078      
                                                                 
Total params: 167,686
Trainable params: 167,686
Non-trainable params: 0
_________________________________________________________________


In [14]:
import sklearn
# skf = sklearn.model_selection.StratifiedKFold(n_splits=10)
# for i, (train_index, test_index) in enumerate(skf.split(train, train_df['label'])):
#     print(f"Fold {i}:")
#     print(f"  Train: index={train_index}")
#     print(f"  Test:  index={test_index}")

In [51]:
# class PrintDot(keras.callbacks.Callback):
#     def on_epoch_end(self, epoch, logs):
#         if epoch % 100 == 0: print('')
#         print('.', end='')

# EPOCHS = 1000

# history = model.fit(
#   train, train_df['label'],
#   epochs=EPOCHS, validation_split = 0.1, verbose=0,
#   callbacks=[PrintDot()])



skf = sklearn.model_selection.KFold(n_splits=5)
models = []
for i, (train_index, test_index) in enumerate(skf.split(train, train_df['label'])):
    model.fit(train.loc[train_index,:]
              , train_df.loc[train_index,'label']
              , epochs=50
              , validation_data=(train.loc[test_index,:],train_df.loc[test_index,'label'])) # validation_split=0.1,
    models.append(model)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/5

Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
E

Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
# from pycaret import classification
# classification.setup(data=train,target='label')

ImportError: cannot import name 'classification' from 'pycaret' (/Users/junho/miniforge3/envs/ml_dl/lib/python3.8/site-packages/pycaret/__init__.py)

In [29]:
test = pd.DataFrame(test_mel,columns=[f'mel_{i}' for i in range(64)])
vector = pd.DataFrame(test_mfcc,columns=[f'mfcc_{i}' for i in range(64)])
test = pd.concat([test,vector],axis=1)

In [30]:
models

[<keras.engine.sequential.Sequential at 0x1c6c1780160>,
 <keras.engine.sequential.Sequential at 0x1c6c1780160>,
 <keras.engine.sequential.Sequential at 0x1c6c1780160>,
 <keras.engine.sequential.Sequential at 0x1c6c1780160>,
 <keras.engine.sequential.Sequential at 0x1c6c1780160>]

In [35]:
preds = [i.predict(test) for i in models]
preds = [np.argmax(i) for i in np.array(preds).mean(axis=0)]
len(preds)



---

In [60]:
train

Unnamed: 0,mel_0,mel_1,mel_2,mel_3,mel_4,mel_5,mel_6,mel_7,mel_8,mel_9,...,mfcc_118,mfcc_119,mfcc_120,mfcc_121,mfcc_122,mfcc_123,mfcc_124,mfcc_125,mfcc_126,mfcc_127
0,0.013719,0.010585,0.011365,0.015147,0.015964,0.012583,0.014782,0.022000,0.022887,0.021157,...,-0.475243,0.129531,-0.105115,-0.443991,-0.567216,-0.303936,0.407998,0.501475,-0.109036,-0.058342
1,0.024698,0.017155,0.015430,0.017026,0.020601,0.027584,0.037012,0.027860,0.029919,0.027653,...,0.629086,0.514676,-0.314312,-0.197818,-0.422195,0.004201,-0.217671,0.523408,0.020408,0.070081
2,0.567651,0.380975,0.426005,0.497681,0.656522,1.292413,1.174476,0.579839,0.448215,0.628672,...,-0.282975,-0.356842,-0.286293,-0.212861,-0.013879,-0.709901,-0.594326,-0.343695,-0.338751,-0.171855
3,0.622868,0.436759,0.338462,0.411688,0.631594,0.584484,0.391578,0.358792,0.512780,0.648389,...,0.205021,0.189386,-0.311290,-0.363777,-0.499418,-1.088192,-0.685789,0.120570,-0.686173,-0.157101
4,3.222057,3.299814,4.119013,4.295127,3.677306,6.946488,6.592564,5.046597,2.378346,2.362655,...,-0.996369,0.035772,-0.338776,-1.020306,-1.039265,-0.053770,0.154234,0.168259,-0.575030,-1.013335
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4996,0.071441,0.051095,0.074915,0.120894,0.138325,0.085271,0.096072,0.140013,0.149688,0.169614,...,0.023185,0.064578,0.330306,-0.484907,0.141396,0.035094,-0.244010,0.037485,-0.131850,-0.035452
4997,104.478737,57.853981,33.674690,22.962410,18.590714,19.335785,18.104355,16.781382,17.089344,21.422354,...,0.731323,-0.307702,0.357692,-0.328093,-0.237218,-0.570983,0.324473,-0.349720,-0.755602,-0.222972
4998,0.249757,0.187390,0.139185,0.187498,0.285357,0.494518,0.362228,0.236297,0.224831,0.261364,...,-0.083660,0.774606,-0.375393,-0.750303,-0.261566,-0.420591,0.519029,0.504589,-0.008825,-0.609881
4999,0.029698,0.022480,0.024308,0.032744,0.044098,0.030326,0.027065,0.034531,0.035271,0.050390,...,0.015787,-0.581784,-0.564808,-0.924709,-0.827351,-0.884314,-0.117483,-0.039797,0.120997,-0.864221


In [62]:
train['label'] = pd.read_csv('./train.csv')['label']

In [67]:
from pycaret import classification
classification.setup(data=train,
                     target='label',
                     # fold_strategy='kfold',
                     train_size=0.8,
                     session_id=77,
                     remove_outliers=True,
                     outliers_method='iforest',
                     outliers_threshold=0.025)

Unnamed: 0,Description,Value
0,Session id,77
1,Target,label
2,Target type,Multiclass
3,Original data shape,"(5001, 257)"
4,Transformed data shape,"(4901, 257)"
5,Transformed train set shape,"(3900, 257)"
6,Transformed test set shape,"(1001, 257)"
7,Numeric features,256
8,Preprocess,True
9,Imputation type,simple


<pycaret.classification.oop.ClassificationExperiment at 0x1c706df4fd0>

In [68]:
best = classification.compare_models(n_select=5,fold=5,sort='Accuracy')

Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
catboost,CatBoost Classifier,0.488,0.821,0.488,0.4795,0.4748,0.3855,0.3883,112.44
lightgbm,Light Gradient Boosting Machine,0.4698,0.8072,0.4698,0.4641,0.4637,0.3634,0.3644,4.38
xgboost,Extreme Gradient Boosting,0.4685,0.802,0.4685,0.4603,0.4603,0.3621,0.3635,25.176
gbc,Gradient Boosting Classifier,0.4398,0.7822,0.4398,0.4336,0.4298,0.3276,0.3296,52.466
rf,Random Forest Classifier,0.4345,0.7782,0.4345,0.4196,0.4095,0.3222,0.3274,1.688
et,Extra Trees Classifier,0.433,0.7765,0.433,0.4141,0.4035,0.3205,0.3265,1.364
lda,Linear Discriminant Analysis,0.4295,0.764,0.4295,0.4375,0.4276,0.3151,0.3167,1.09
lr,Logistic Regression,0.4252,0.7576,0.4252,0.4273,0.4239,0.3099,0.3106,1.884
ridge,Ridge Classifier,0.4152,0.0,0.4152,0.4134,0.4055,0.2971,0.3002,0.51
qda,Quadratic Discriminant Analysis,0.3985,0.6962,0.3985,0.3933,0.349,0.2788,0.2878,0.572


Processing:   0%|          | 0/73 [00:00<?, ?it/s]

In [52]:
submission = pd.read_csv('./sample_submission.csv')
submission['label'] = preds[0] # 0,2,3
submission.to_csv('./baseline_submission.csv', index=False)