wordnet_aug

import pandas as pd
import numpy as np
import nlpaug.augmenter.word as naw
from tqdm import tqdm

# Load Data
df = pd.read_csv('path/data.csv')

# Augment Argument
wordnet = naw.SynonymAug(aug_src='wordnet')

# Augment Function
def augmentMyData(df, selectedclass, repetitions=1, samples=200):
    augmented_texts = []
    # select only the class to augment
    filter_df = df[df['label'] == selectedclass].reset_index(drop=True) # removes unecessary index column
    for i in tqdm(np.random.randint(0, len(filter_df), samples)):
        # generating 'n_samples' augmented texts
        for _ in range(repetitions):
            augmented_text = wordnet.augment(filter_df['description'].iloc[i])
            augmented_texts.append(augmented_text)
    
    data = {
        'description': augmented_texts,
        'label': selectedclass        
    }
    aug_df = pd.DataFrame(data)
    return aug_df

# Run Augment
aug_data = pd.DataFrame()
aug_data = aug_data.append(augmentMyData(df, selectedclass="selectedclass", samples=samples))

# Convert augmented data to string type
aug_data['description'] = aug_data['description'].str[0]

df = df.append(aug_data)