In [2]:
import pandas as pd
from faker import Faker
import uuid
import random
import json
from datetime import datetime, timedelta

In [3]:
# Create a Faker instance for generating fake data
fake = Faker()

# Function to generate fake data based on the data type
def generate_fake_data(data_type, repeat_percentage=0):
    if data_type == 'uuid':
        if random.random() < repeat_percentage:
            return uuid.uuid4()
        else:
            return str(uuid.uuid4())
    elif data_type == 'timestamp':
        # Generate a timestamp within a recent date range
        now = datetime.now()
        start_date = now - timedelta(days=365)
        return fake.date_time_between(start_date, now)
    elif data_type == 'string':
        return fake.word()
    elif data_type == 'json':
        return json.dumps({'key1': fake.word(), 'key2': fake.word()})
    elif data_type == 'boolean':
        return random.choice([True, False])

# Function to create a Pandas DataFrame with fake data
def create_fake_dataframe(column_dict, num_rows=10, repeat_percentage=0):
    data = {}
    
    for column, data_type in column_dict.items():
        data[column] = [generate_fake_data(data_type, repeat_percentage) for _ in range(num_rows)]
    
    df = pd.DataFrame(data)
    return df

In [4]:
# Example usage:
content_dict = {
    'id': 'uuid',
    'created_at': 'timestamp',
    'updated_at': 'timestamp',
    'job_id': 'uuid',
    'step': 'string',
    'status': 'string',
    'updated_by': 'string',
    'content_filter': 'json',
    'project_id': 'uuid',
    'selected_flavors': 'json',
    'flavors': 'json',
    'prompt_schema_id': 'uuid',
    'cloned_from_id': 'uuid',
    'is_archived': 'boolean',
    'metadata': 'json',
    'root_id': 'uuid',
    'content_type': 'string',
    'ai_model_id': 'uuid'
}

df = create_fake_dataframe(content_dict, num_rows=2000, repeat_percentage=0.2)
df

Unnamed: 0,id,created_at,updated_at,job_id,step,status,updated_by,content_filter,project_id,selected_flavors,flavors,prompt_schema_id,cloned_from_id,is_archived,metadata,root_id,content_type,ai_model_id
0,0ef50c8a-c01b-4e30-b52a-928f59f07d2b,2023-03-23 21:08:49,2022-11-10 01:56:43,1a98587c-58a2-4c81-ba74-eed3cace8e3d,catch,remain,into,"{""key1"": ""follow"", ""key2"": ""evening""}",b9d412c7-388b-42e2-a6e2-25958f6b8dcb,"{""key1"": ""example"", ""key2"": ""each""}","{""key1"": ""nor"", ""key2"": ""whom""}",d0aecade-0fa7-4a07-944f-09d9e596fc66,83468f0e-2edc-444d-be06-11576579ef8e,False,"{""key1"": ""represent"", ""key2"": ""cold""}",74622ee8-bbac-4009-b377-dc1c50422479,pattern,0ba2393b-6fe3-4998-b65d-dead2e41b494
1,bc890020-04fa-46fc-b24a-c24c44fc77b1,2023-02-27 02:32:54,2023-04-26 10:18:01,749e202c-b56f-48f9-ada4-a7ffad4ef005,edge,buy,most,"{""key1"": ""throughout"", ""key2"": ""since""}",a21659e3-51af-4f99-9c3a-ac73e6f2431e,"{""key1"": ""process"", ""key2"": ""power""}","{""key1"": ""ready"", ""key2"": ""attack""}",1e0fb040-7731-4cfa-80a0-399f95a86f5a,e7f51a8b-6157-4fb7-a1de-d11a353dbcda,True,"{""key1"": ""name"", ""key2"": ""citizen""}",e88c0db3-460d-49bf-ace7-9a657f5aa8ca,late,956597e3-7433-4ea0-92ef-4cbf4f381844
2,0f781048-9ed3-4a80-90fd-adc1bf83eeb1,2022-12-04 09:36:11,2023-06-19 19:56:16,21fa87e0-553a-43a2-a287-005a0f9d4e72,each,message,look,"{""key1"": ""whose"", ""key2"": ""treat""}",7ddc3b3a-7e9e-4664-b618-8751d96a5e97,"{""key1"": ""change"", ""key2"": ""seat""}","{""key1"": ""Congress"", ""key2"": ""reveal""}",50fcb5b3-47f5-451d-9c6e-4dc44cc14453,f43b73a9-1f0c-4984-aa24-bc59c3177345,True,"{""key1"": ""hotel"", ""key2"": ""interest""}",472c256a-4a9e-4af9-a852-a172ad499ed4,system,59ef37e6-76b5-46ba-ae8f-91398fd541dd
3,1fcae300-db7e-4695-942a-883108b20b3d,2023-07-04 02:24:25,2022-12-22 21:19:35,2d26cb99-234f-43a1-bb79-3229d065636b,list,south,they,"{""key1"": ""financial"", ""key2"": ""foreign""}",9523d87d-0d2f-473f-afac-94d527fd41ea,"{""key1"": ""court"", ""key2"": ""special""}","{""key1"": ""decision"", ""key2"": ""tough""}",3f2ab616-10c9-4461-b220-d4a9d73a55d4,ed8546ff-654d-494a-aa28-7a296c488a69,False,"{""key1"": ""special"", ""key2"": ""require""}",af1162c3-5a0c-4393-8dc4-4d4ea951b55c,and,38e5f0bc-a644-47bb-a7c6-bcb3afa772b8
4,a27eddd4-8ae3-47a7-9a4f-50b37b087635,2023-07-22 19:01:11,2023-10-13 01:26:03,4e3ea916-a7a4-424d-a697-8cfbaab857d8,build,good,fast,"{""key1"": ""lead"", ""key2"": ""industry""}",a60788f7-5bb7-45e1-aa4c-8070173d19db,"{""key1"": ""training"", ""key2"": ""race""}","{""key1"": ""event"", ""key2"": ""south""}",ea3fc86c-37f6-48de-bc0e-67e0eabe3779,9ae800ce-a208-478d-9b11-64cfcc3c8de5,True,"{""key1"": ""kitchen"", ""key2"": ""religious""}",16cac6e4-2b0c-489a-ad5a-e289384b6a06,really,8afd8fea-e638-4f7d-9aa9-67e4951cd57c
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,5872a783-92b2-4836-9ac7-54a04a9e2d98,2023-02-08 20:44:33,2022-11-15 16:51:40,ee0dfb58-8e92-4634-87ec-2e781a5b2bc3,rate,pull,ok,"{""key1"": ""record"", ""key2"": ""stand""}",ea8a2ecb-5698-4ed6-94ea-a9c091234f45,"{""key1"": ""maybe"", ""key2"": ""around""}","{""key1"": ""clear"", ""key2"": ""protect""}",859d6661-3d21-4066-9171-35b721f4b6bc,0d4e7b06-22f5-4602-87ad-48bbf66f499a,True,"{""key1"": ""point"", ""key2"": ""it""}",96b14642-23d6-4fea-93e3-6801ef974d1a,mouth,b277fe0c-bc13-4b70-9896-48762f4ad09d
1996,518496b8-7353-47ed-b4be-12c29ed8f8f7,2023-03-11 08:53:24,2023-02-14 14:13:08,6e83d51b-f3be-4148-8c9b-90ffae58c0b4,ready,exactly,worker,"{""key1"": ""history"", ""key2"": ""simple""}",c808d842-a5ce-4194-a050-cd15907d41ba,"{""key1"": ""question"", ""key2"": ""little""}","{""key1"": ""official"", ""key2"": ""region""}",d4459c4e-ffa9-42cc-8032-a8298589746d,014f1f07-f87b-4f77-8f01-8cba04de511e,False,"{""key1"": ""sure"", ""key2"": ""myself""}",c3d6a5e7-fe16-4015-a4c3-b09295bda5d8,some,23be2eb9-adf9-4e10-b5ad-6e9dbacac266
1997,165f522a-e1b7-4a10-b598-88e7f781e11f,2023-03-25 15:47:16,2023-06-24 10:25:46,103d175b-b572-4fc5-8570-3f9044a7e986,certain,mother,such,"{""key1"": ""will"", ""key2"": ""main""}",c502e273-8484-4882-bc34-f1a830777675,"{""key1"": ""benefit"", ""key2"": ""if""}","{""key1"": ""laugh"", ""key2"": ""public""}",c2b3b487-98ad-4bd6-b96f-e681f7858e82,50948390-5d09-44ef-b4ee-0bee9f893349,True,"{""key1"": ""program"", ""key2"": ""attack""}",8a82c4c3-e088-4561-93e1-118c06a423d8,enter,67639ce8-cd13-4265-a705-d540312bfa0f
1998,9f3f9446-d8ad-4557-8791-2d9f2f3d2d8e,2023-06-11 18:01:22,2023-04-20 16:58:06,93627e26-f90a-496d-9cb3-f4eef753ecee,week,relate,agreement,"{""key1"": ""maintain"", ""key2"": ""against""}",712afec6-f715-47d3-acaf-0414ca4276a5,"{""key1"": ""subject"", ""key2"": ""require""}","{""key1"": ""city"", ""key2"": ""husband""}",e500d98b-b207-432f-9a55-794d427dd158,40edfec0-f947-4ae1-bc90-de6920f261bd,False,"{""key1"": ""whose"", ""key2"": ""value""}",0c850b50-e4c9-487e-9a49-cce287875dda,kid,3cf7fd9a-8fe9-4016-9503-a3ee5c75ff3f


In [5]:
df.to_csv('../extractor/Downloads/fake_items.csv')