In [20]:
import pandas as pd
from faker import Faker
import uuid
import random
import json
from datetime import datetime, timedelta

In [21]:
fake = Faker()

def generate_fake_data(data_type, repeat_percentage=0, uuid_cache=None):
    if data_type == 'uuid':
        if uuid_cache is not None and random.random() < repeat_percentage:
            return random.choice(uuid_cache)
        else:
            new_uuid = str(uuid.uuid4())
            if uuid_cache is not None:
                uuid_cache.append(new_uuid)
            return new_uuid
    elif data_type == 'timestamp':
        # Generate a timestamp within the last year period
        now = datetime.now()
        start_date = now - timedelta(days=365)
        return fake.date_time_between_dates(start_date, now)
    elif data_type == 'string':
        return fake.word()
    elif data_type == 'json':
        return json.dumps({'key1': fake.word(), 'key2': fake.word()})
    elif data_type == 'boolean':
        return random.choice([True, False])
    
def create_fake_dataframe(column_dict, num_rows=10, repeat_percentage=0):
    data = {}
    uuid_cache = []

    for column, data_type in column_dict.items():
        data[column] = [generate_fake_data(data_type, repeat_percentage, uuid_cache) for _ in range(num_rows)]

    df = pd.DataFrame(data)
    return df

In [22]:
# Example usage:
content_dict = {
    'id': 'uuid',
    'created_at': 'timestamp',
    'updated_at': 'timestamp',
    'job_id': 'uuid',
    'step': 'string',
    'status': 'string',
    'updated_by': 'string',
    'content_filter': 'json',
    'project_id': 'uuid',
    'selected_flavors': 'json',
    'flavors': 'json',
    'prompt_schema_id': 'uuid',
    'cloned_from_id': 'uuid',
    'is_archived': 'boolean',
    'metadata': 'json',
    'root_id': 'uuid',
    'content_type': 'string',
    'ai_model_id': 'uuid'
}

df = create_fake_dataframe(content_dict, num_rows=2000, repeat_percentage=0.6)
df

Unnamed: 0,id,created_at,updated_at,job_id,step,status,updated_by,content_filter,project_id,selected_flavors,flavors,prompt_schema_id,cloned_from_id,is_archived,metadata,root_id,content_type,ai_model_id
0,412bd17e-a46d-4050-8df5-f9ca0c26e0c7,2023-07-12 00:46:13,2022-11-19 11:21:37,0043c0f5-f574-4eb6-8e28-9933c38e3f53,language,meeting,owner,"{""key1"": ""against"", ""key2"": ""across""}",a7742b34-3c22-4df8-bf9f-c9fdf9135670,"{""key1"": ""seek"", ""key2"": ""so""}","{""key1"": ""hold"", ""key2"": ""Congress""}",64db7edf-4f1d-4aea-aa6b-d72274c25c04,3165f3ef-dd8a-47c4-bebf-b2b42a81276a,False,"{""key1"": ""speak"", ""key2"": ""important""}",1907b857-0611-4961-aae6-44337c77f2b9,father,de01b15b-3faa-4774-9144-ac79b7b9b315
1,412bd17e-a46d-4050-8df5-f9ca0c26e0c7,2022-12-29 05:53:06,2023-06-20 02:42:11,f3268443-9c55-45a0-ac2d-dbe816714dfd,without,region,property,"{""key1"": ""account"", ""key2"": ""particularly""}",fe852b9c-141a-48a4-a343-6829db8ea4e7,"{""key1"": ""herself"", ""key2"": ""couple""}","{""key1"": ""send"", ""key2"": ""our""}",26674676-ee77-4182-bbfa-6da87002d281,7fa23806-95d8-4780-ba10-0f8faefa8169,True,"{""key1"": ""sometimes"", ""key2"": ""hair""}",9a6d2762-cf1d-47ec-b938-e805254bd132,about,51e501fc-cc82-400a-ba8e-340b818751d3
2,412bd17e-a46d-4050-8df5-f9ca0c26e0c7,2022-11-15 19:39:04,2023-07-17 15:29:45,8b094eb5-8a39-47a1-9deb-dffc78738c92,month,wear,college,"{""key1"": ""situation"", ""key2"": ""know""}",6b81d9ef-796a-4965-a126-4b13bb2d695f,"{""key1"": ""measure"", ""key2"": ""determine""}","{""key1"": ""general"", ""key2"": ""character""}",d9797102-2045-4f93-958b-3e6439029f70,a2efd873-4a04-4aa4-800c-9c51bf840fc0,True,"{""key1"": ""statement"", ""key2"": ""experience""}",6d5f5c97-1736-4b5c-a29d-4272b90ce49c,get,e455d7df-1d6d-4fd7-b51a-7a08112f6614
3,6b6aee59-618e-473b-9b38-c83ac68e3bd0,2023-05-02 06:05:50,2023-08-24 11:54:00,b4a2aadf-14ac-404f-87e7-91b8a9d7c6b6,on,school,spring,"{""key1"": ""Congress"", ""key2"": ""that""}",6f76b957-b8b0-47be-b17c-90be7ac35e6a,"{""key1"": ""then"", ""key2"": ""along""}","{""key1"": ""able"", ""key2"": ""morning""}",d7a8fddf-8a76-47c8-8282-ca3c52d31ad8,be3eb513-23d7-4b24-813a-174c21d2969e,True,"{""key1"": ""building"", ""key2"": ""modern""}",1a0858c2-ad7f-46c2-8b21-cdfa7070247a,for,053a1a89-7e4d-40f9-8039-970177d1a5b6
4,412bd17e-a46d-4050-8df5-f9ca0c26e0c7,2023-04-27 06:23:59,2022-12-12 06:19:05,789e0ef3-6685-4afe-a98d-d016573656b9,will,himself,cold,"{""key1"": ""candidate"", ""key2"": ""tree""}",c922edf9-f93a-4bd8-afc6-8559c3bc3805,"{""key1"": ""score"", ""key2"": ""three""}","{""key1"": ""social"", ""key2"": ""might""}",138954f1-55ad-4b94-aaf2-4c98a9f5df51,762d5c29-f45e-407e-84e9-9bc8a519b242,False,"{""key1"": ""yard"", ""key2"": ""minute""}",38e356d0-a5c8-483c-9436-d7a7c3f7657c,stay,3203505d-b979-45b1-88a8-81110a3e9b5a
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,9dccc535-1516-4972-b932-b3dfcc6efcde,2023-05-03 11:25:49,2023-03-27 02:48:29,1633a410-f2bf-4a87-afff-28bdbe738a9e,notice,task,hospital,"{""key1"": ""treat"", ""key2"": ""take""}",fc9b96d9-389c-49ea-bd6c-b173929d7ffd,"{""key1"": ""drug"", ""key2"": ""after""}","{""key1"": ""sister"", ""key2"": ""family""}",188f64be-a3ea-4aa2-85a1-f845ba8de4b1,8c7bdd47-70b2-451f-bd82-a1619a6ceec1,False,"{""key1"": ""test"", ""key2"": ""turn""}",1fd3d068-5ebe-450c-a312-73f1d74e5e09,food,74c82b54-8bf6-4669-9890-919d9ff77620
1996,f3c2aa61-713b-40a7-9890-2c73381ed106,2023-03-04 05:17:19,2023-02-01 03:30:07,272ad665-0c97-41c4-8c93-8eb932688841,green,understand,upon,"{""key1"": ""already"", ""key2"": ""letter""}",e2397e71-6e7c-45a1-b4b4-8bbf28b278cc,"{""key1"": ""test"", ""key2"": ""bank""}","{""key1"": ""concern"", ""key2"": ""may""}",7b69ac6d-7197-49c4-a1ed-464c86100b8e,c51ed852-3d8d-404d-b62f-feb2d9fd0625,False,"{""key1"": ""marriage"", ""key2"": ""worry""}",974bbfa1-9c7c-44b2-bedc-1aa231e1fa89,democratic,0ffd03c5-0b90-4e75-94a1-1a4f8c008716
1997,e59dec2d-91cf-45ef-9ea6-4b574b4f9f11,2023-05-09 12:29:36,2023-04-10 08:50:05,9412348b-70ce-4fc7-874f-452698f0685c,oil,leave,these,"{""key1"": ""spring"", ""key2"": ""Mrs""}",77da52d9-06c9-4ede-9925-45f4fdf34043,"{""key1"": ""seven"", ""key2"": ""both""}","{""key1"": ""daughter"", ""key2"": ""sense""}",52ea0250-961a-497f-8e9e-6d91823ac1a6,99a81d06-3bb1-439f-b93b-159d65ebff87,False,"{""key1"": ""design"", ""key2"": ""sit""}",c9d874bf-e0b1-40d9-953f-622f20b3854f,possible,065baa34-22d9-4a44-9e4c-92c7b23003f9
1998,1c55c5d0-36cb-4b7a-965d-dddbcfa6ae82,2023-01-10 23:59:28,2023-04-28 01:33:57,3527baca-58ea-46d9-a4ae-903b7afa4617,serious,store,kitchen,"{""key1"": ""arrive"", ""key2"": ""help""}",bacb5fe6-113a-4c3a-bcb0-816f526fb9de,"{""key1"": ""war"", ""key2"": ""company""}","{""key1"": ""still"", ""key2"": ""off""}",44bee303-ce1a-4929-9dd1-7ea4279b2f99,c738d7c0-b204-423d-b352-fb999eb0676a,False,"{""key1"": ""response"", ""key2"": ""old""}",d2604406-206f-4e4c-8be4-5036ee4b1e91,shoulder,cee14199-244c-44b7-af75-f45f82dd12fb


In [23]:
df.dtypes

id                          object
created_at          datetime64[ns]
updated_at          datetime64[ns]
job_id                      object
step                        object
status                      object
updated_by                  object
content_filter              object
project_id                  object
selected_flavors            object
flavors                     object
prompt_schema_id            object
cloned_from_id              object
is_archived                   bool
metadata                    object
root_id                     object
content_type                object
ai_model_id                 object
dtype: object

In [24]:
df['id'].value_counts().reset_index()

Unnamed: 0,id,count
0,93612d61-38fa-4674-8779-7411b993eeec,12
1,1b234577-5bab-4d9b-92ea-1576e69a2df4,11
2,6b6aee59-618e-473b-9b38-c83ac68e3bd0,11
3,6233de2b-fc38-4099-91e0-e4167c619c6b,11
4,af8f986d-b06c-4941-822d-b9d049f75f42,11
...,...,...
825,370738d9-09db-4a02-9e98-5bba4686f8cd,1
826,0aa1cd48-32ad-4d12-9df0-2a9d3a99c05a,1
827,2ee07596-d094-4ad4-9ad8-b7bc4f7cb966,1
828,a7594bb8-3625-4746-8b59-6126051b36bd,1


In [25]:
df.to_csv('../extractor/Downloads/fake_items.csv', header=False)