In [4]:
from generatorUtils import LabelTransformations
import random
from faker import Faker
f = Faker()
from collections import Counter


In [10]:
loan_labels = ['loan', 'loan amount', 'credit amount']
transaction_labels = ['transaction amount', 'transaction', 'bid', 'bid amount', 'balance']
income_labels = ['salary', 'salary amount', 'income', 'income amount']

labels = loan_labels + transaction_labels + income_labels


In [11]:
class AmountGenerator:
    def __init__(self, num_of_samples):
        self.labelTransformations = LabelTransformations()
        self.num_of_samples = num_of_samples
        self.f = Faker()

    def get_currency_abbreviation(self,):
        return ' ' + f.currency_code()

    def get_currency_symbol(self,):
        return ' ' + f.currency_symbol()

    def get_currency_name(self,):
        return ' ' + f.currency_name()

    def amount_transform(self, input_amount, output_amount):

        input_amount_with_commas = f"{int(input_amount):,}" 
        output_amount_with_commas = f"{int(output_amount):,}" 


        input_pnumber = input_amount_with_commas.split(',')[0]
        output_pnumber = output_amount_with_commas.split(',')[0]

        input_fraction_pnumber = str(eval(input_pnumber) + eval("{:.2f}".format(random.random())))
        output_fraction_pnumber = str(eval(output_pnumber) + eval("{:.2f}".format(random.random())))


        input_comma_count = Counter(input_amount_with_commas)
        input_comma_count = input_comma_count[',']
        output_comma_count = Counter(output_amount_with_commas)
        output_comma_count = output_comma_count[',']

        if input_comma_count == 1:
            input_unit= random.choice(['k', 'thousand', ' k', ' thousand'])
        elif input_comma_count == 2:
            input_unit= random.choice(['mn', 'million', ' mn', ' million'])
        elif input_comma_count == 3:
            input_unit= random.choice(['bn', 'billion', ' bn', ' billion'])
        else:
            input_unit = ''

        # if output_comma_count == 1:
        #     output_unit= random.choice(['k', 'thousand', ' k', ' thousand'])
        # elif output_comma_count == 2:
        #     output_unit= random.choice(['mn', 'million', ' mn', ' million'])
        # elif output_comma_count == 3:
        #     output_unit= random.choice(['bn', 'billion', ' bn', ' billion'])
        # else:
        #     output_unit = ''
        output_unit = input_unit

        return [input_amount, input_amount+"{:.2f}".format(random.random()), input_pnumber + input_unit, input_fraction_pnumber + input_unit],\
                [output_amount, output_amount+"{:.2f}".format(random.random()), output_pnumber + output_unit, output_fraction_pnumber + output_unit]

        
    def generateAmountData(self, labels):
        labels = list(set(self.labelTransformations(labels)))
        
        input_amount_list = []
        output_amount_list = []
        for i in range(self.num_of_samples):
            

            '''amounts of length 4 to 10'''
            self.generateAmount(labels, input_amount_list, output_amount_list, 4)
            self.generateAmount(labels, input_amount_list, output_amount_list, 5)
            self.generateAmount(labels, input_amount_list, output_amount_list, 6)
            self.generateAmount(labels, input_amount_list, output_amount_list, 7)
            self.generateAmount(labels, input_amount_list, output_amount_list, 8)
            self.generateAmount(labels, input_amount_list, output_amount_list, 9)
            self.generateAmount(labels, input_amount_list, output_amount_list, 10)

        return input_amount_list, output_amount_list
        
    def generateAmount(self, labels, input_amount_list, output_amount_list, length):
        
        #without currencies
        start_number = str(random.randint(a=0, b=9))
        label = random.choice(labels)
        input_following_random_number = self.generate_random_number(length-1)
        output_following_random_number = self.generate_random_number(length-1)

        input_amount = start_number + input_following_random_number
        output_amount = start_number + output_following_random_number

        input_amount_transforms, output_amount_transforms = self.amount_transform(input_amount, output_amount)
        random_index = random.choice(list(range(len(input_amount_transforms))))
        input_amount_list.append(label + ': ' + input_amount_transforms[random_index])
        output_amount_list.append(output_amount_transforms[random_index])

        #with currency symbol
        start_number = str(random.randint(a=0, b=9))
        label = random.choice(labels)
        currency_symbol = self.get_currency_symbol()
        
        input_following_random_number = self.generate_random_number(length-1)
        output_following_random_number = self.generate_random_number(length-1)

        input_amount = start_number + input_following_random_number
        output_amount = start_number + output_following_random_number

        input_amount_transforms, output_amount_transforms = self.amount_transform(input_amount, output_amount)
        random_index = random.choice(list(range(len(input_amount_transforms))))
        input_amount_list.append(label + ': ' + input_amount_transforms[random_index] + currency_symbol)
        output_amount_list.append(output_amount_transforms[random_index] + currency_symbol)

        #with currency abbreviation
        start_number = str(random.randint(a=0, b=9))
        label = random.choice(labels)
        currency_abbreviation = self.get_currency_abbreviation()
        input_following_random_number = self.generate_random_number(length-1)
        output_following_random_number = self.generate_random_number(length-1)

        input_amount = start_number + input_following_random_number
        output_amount = start_number + output_following_random_number

        input_amount_transforms, output_amount_transforms = self.amount_transform(input_amount, output_amount)
        random_index = random.choice(list(range(len(input_amount_transforms))))
        input_amount_list.append(label + ': ' + input_amount_transforms[random_index] + currency_abbreviation)
        output_amount_list.append(output_amount_transforms[random_index] + currency_abbreviation)

        #with currency name
        start_number = str(random.randint(a=0, b=9))
        label = random.choice(labels)
        currency_name = self.get_currency_name()
        input_following_random_number = self.generate_random_number(length-1)
        output_following_random_number = self.generate_random_number(length-1)

        input_amount = start_number + input_following_random_number
        output_amount = start_number + output_following_random_number

        input_amount_transforms, output_amount_transforms = self.amount_transform(input_amount, output_amount)
        random_index = random.choice(list(range(len(input_amount_transforms))))
        input_amount_list.append(label + ': ' + input_amount_transforms[random_index] + currency_name)
        output_amount_list.append(output_amount_transforms[random_index] + currency_name)

        return input_amount_list
            
            

    def generate_random_number(self, length):
        start = eval(''.join(['1'] + ['0' for i in range(1, length-1)]))
        end = eval(''.join(['9' for i in range(1, length)]))

        start = int(eval('1' + str(start)))
        end = int(eval('9' + str(end)))


        random_number = str(random.randint(a=start, b=end))
        return random_number


In [18]:
amountGenerator = AmountGenerator(num_of_samples = 600)

In [25]:
input, output = amountGenerator.generateAmountData(labels)

In [26]:
import pandas as pd
res = pd.DataFrame(columns=['input_entity', 'output_entity'])

In [27]:
res['input_entity'] = input
res['output_entity'] = output

In [28]:
len(res)

16800

In [31]:
res.to_csv('Amount_data.csv', index=False)

In [30]:
res

Unnamed: 0,input_entity,output_entity
0,loan_amount: 6282,6310
1,INCOME_AMOUNT: 928 ؋,428 ؋
2,Credit Amount: 4thousand ANG,4thousand ANG
3,Salary_Amount: 69080.35 Cuban convertible peso,62070.73 Cuban convertible peso
4,transaction amount: 449680.26,414910.36
...,...,...
16795,BID: 666million Zimbabwean dollar,640million Zimbabwean dollar
16796,BID: 98466561810.37,97092119780.27
16797,income amount: 02128722950.38 $,01865350590.89 $
16798,bid-amount: 6billion TVD,6billion TVD
