In [13]:
import random
from faker import Faker
from generatorUtils import LabelTransformations

from datetime import date, datetime as dt

In [29]:
labels = ['date', 'published date', 'transaction date', 
'date of birth', 'date of commencement', 'date of joining',
'end date', 'date of transaction', 'date of purchase', 'last authentication date', 
'role created date', 'creation date', 'modified date', 'last modified date', 'login created date']

In [30]:
class DateGenerator:
    def __init__(self, num_of_samples):
        self.num_of_samples = num_of_samples
        self.labelTransformations = LabelTransformations()

    def suffix(self, d):
        return 'th' if 11<=d<=13 else {1:'st',2:'nd',3:'rd'}.get(d%10, 'th')

    def custom_strftime(self, format, t):
        return t.strftime(format).replace('{S}', str(t.day) + self.suffix(t.day))

    def get_random_date(self,):
        start_dt = date.today().replace(day=1, month=1).toordinal()
        end_dt = date.today().toordinal()
        random_day = dt.fromordinal(random.randint(start_dt, end_dt))
        return random_day

    def generateDates(self, labels):
        f = Faker()
        labels = list(set(self.labelTransformations(labels)))
        date_input_strings = []
        date_output_strings = []
        
        for i in range(self.num_of_samples):
            # January 1, 1996
            label = random.choice(labels)
            year = f.year()
            date_input_strings.append(f'{label}: ' + f.month_name() + ' ' + f.day_of_month() + ', ' + year)
            date_output_strings.append(f.month_name() + ' ' + f.day_of_month() + ', ' + year)

            # 1 January 2001
            label = random.choice(labels)
            year = f.year()
            date_input_strings.append(f'{label}: ' + f.day_of_month() + ' ' + f.month_name() + ' ' + year)
            date_output_strings.append(f.day_of_month() + ' ' + f.month_name() + ' ' + year)

            # Jan 1, 1996
            label = random.choice(labels)
            year = f.year()
            date_input_strings.append(f'{label}: ' + f.month_name()[:3] + ' ' + f.day_of_month() + ', ' + year)
            date_output_strings.append(f.month_name() + ' ' + f.day_of_month()[:3] + ', ' + year)

            # 1 Jan 2001
            label = random.choice(labels)
            year = f.year()
            date_input_strings.append(f'{label}: ' + f.day_of_month()[:3] + ' ' + f.month_name() + ' ' + year)
            date_output_strings.append(f.day_of_month() + ' ' + f.month_name()[:3] + ' ' + year)

            # 2001-01-12
            label = random.choice(labels)
            
            date_input_strings.append(f'{label}: ' + f.date())
            date_output_strings.append(f.date())

            # 12-01-1994
            label = random.choice(labels)
            
            date_input_strings.append(f'{label}: ' + f.date(pattern='%d-%m-%Y'))
            date_output_strings.append(f.date(pattern='%d-%m-%Y'))

            # 2003/01/21
            label = random.choice(labels)
            
            date_input_strings.append(f'{label}: ' + f.date(pattern='%Y/%m/%d'))
            date_output_strings.append(f.date(pattern='%Y/%m/%d'))

            # 01/01/1993
            label = random.choice(labels)
            
            date_input_strings.append(f'{label}: ' + f.date(pattern='%d/%m/%Y'))
            date_output_strings.append(f.date(pattern='%d/%m/%Y'))

            # March 2nd, 2021
            label = random.choice(labels)
            date_input_strings.append(f'{label}: ' + self.custom_strftime('%B {S}, %Y', self.get_random_date()))
            date_output_strings.append(self.custom_strftime('%B {S}, %Y', self.get_random_date()))

            #March 6th
            label = random.choice(labels)
            date_input_strings.append(f'{label}: ' + self.custom_strftime('%B {S}', self.get_random_date()))
            date_output_strings.append(self.custom_strftime('%B {S}', self.get_random_date()))

            #30th March
            label = random.choice(labels)
            date_input_strings.append(f'{label}: ' + self.custom_strftime('{S} %B', self.get_random_date()))
            date_output_strings.append(self.custom_strftime('{S} %B', self.get_random_date()))

            #Mar 6th
            d = self.custom_strftime('{S} %B', self.get_random_date())
            d = d.split(' ')[1][:3] + ' ' + d.split(' ')[0]
            date_input_strings.append(f'{label}: ' + d)

            d = self.custom_strftime('{S} %B', self.get_random_date())
            d = d.split(' ')[1][:3] + ' ' + d.split(' ')[0]
            date_output_strings.append(d)

            #6th Mar
            d = self.custom_strftime('{S} %B', self.get_random_date())
            d = d.split(' ')[0] + ' ' + d.split(' ')[1][:3]
            date_input_strings.append(f'{label}: ' + d)
            
            d = self.custom_strftime('{S} %B', self.get_random_date())
            d = d.split(' ')[0] + ' ' + d.split(' ')[1][:3]
            date_output_strings.append(d)

        return date_input_strings, date_output_strings


In [31]:
dateGenerator = DateGenerator(num_of_samples=300)

In [32]:
input, output = dateGenerator.generateDates(labels)

In [33]:
list(zip(input, output))

[('roleCreatedDate: December 19, 2009', 'June 01, 2009'),
 ('creation-date: 13 January 1998', '17 September 1998'),
 ('transactiondate: Oct 07, 2000', 'February 15, 2000'),
 ('Date Of Purchase: 23 February 2018', '07 Sep 2018'),
 ('Last-Authentication-Date: 2013-12-06', '1993-08-10'),
 ('DateOfCommencement: 27-05-1974', '05-03-1970'),
 ('LOGIN_CREATED_DATE: 2009/10/17', '1982/03/14'),
 ('dateofjoining: 27/03/1987', '14/05/1981'),
 ('modified_date: February 13th, 2022', 'March 10th, 2022'),
 ('ROLE_CREATED_DATE: January 26th', 'January 7th'),
 ('last_authentication_date: 16th April', '28th January'),
 ('last_authentication_date: May 3rd', 'Apr 19th'),
 ('last_authentication_date: 4th Feb', '12th Jan'),
 ('roleCreatedDate: August 09, 1976', 'June 26, 1976'),
 ('modified-date: 17 August 1997', '17 August 1997'),
 ('date-of-transaction: Feb 06, 1998', 'March 20, 1998'),
 ('MODIFIED-DATE: 26 April 2011', '30 May 2011'),
 ('role-created-date: 1990-08-03', '1990-02-23'),
 ('Login_Created_Date

In [34]:
import pandas as pd
res = pd.DataFrame(columns=['input_entity', 'output_entity'])

In [35]:
res['input_entity'] = input
res['output_entity'] = output

In [36]:
res.to_csv('date_data.csv', index=False)

In [37]:
len(res)

3900