#### Load environment variables

In [1]:
import os
from dotenv import load_dotenv
load_dotenv()

# To call openai models
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

# To use LangSmith
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")

#### Setup 

In [2]:
from langchain_core.prompts import FewShotPromptTemplate, PromptTemplate
from langchain_core.pydantic_v1 import BaseModel
from langchain_openai import ChatOpenAI
from langchain_experimental.tabular_synthetic_data.openai import (
    OPENAI_TEMPLATE,
    create_openai_data_generator,
)
from langchain_experimental.tabular_synthetic_data.prompts import (
    SYNTHETIC_FEW_SHOT_PREFIX,
    SYNTHETIC_FEW_SHOT_SUFFIX,
)

#### Define Your Data Model

In [3]:
class MedicalBilling(BaseModel):
    patient_id: int
    patient_name: str
    diagnosis_code: str
    procedure_code: str
    total_charge: float
    insurance_claim_amount: float

#### Sample data

In [4]:
examples = [
    {
        "example": """Patient ID: 123456, Patient Name: John Doe, Diagnosis Code: 
        J20.9, Procedure Code: 99203, Total Charge: $500, Insurance Claim Amount: $350"""
    },
    {
        "example": """Patient ID: 789012, Patient Name: Johnson Smith, Diagnosis 
        Code: M54.5, Procedure Code: 99213, Total Charge: $150, Insurance Claim Amount: $120"""
    },
    {
        "example": """Patient ID: 345678, Patient Name: Emily Stone, Diagnosis Code: 
        E11.9, Procedure Code: 99214, Total Charge: $300, Insurance Claim Amount: $250"""
    },
]

#### Create a Prompt Template

In [5]:
OPENAI_TEMPLATE = PromptTemplate(input_variables=["example"], template="{example}")

prompt_template = FewShotPromptTemplate(
    prefix=SYNTHETIC_FEW_SHOT_PREFIX,
    examples=examples,
    suffix=SYNTHETIC_FEW_SHOT_SUFFIX,
    input_variables=["subject", "extra"],
    example_prompt=OPENAI_TEMPLATE,
)

#### Creating the Data Generator

In [6]:
synthetic_data_generator = create_openai_data_generator(
    output_schema=MedicalBilling,
    llm=ChatOpenAI(
        temperature=1
    ),  # You'll need to replace with your actual Language Model instance
    prompt=prompt_template,
)

#### Generate Synthetic Data

In [7]:
synthetic_results = synthetic_data_generator.generate(
    subject="medical_billing",
    extra="the name must be chosen at random. Make it something you wouldn't normally choose.",
    runs=10,
)

In [8]:
synthetic_results

[MedicalBilling(patient_id=456789, patient_name='Isabella Rodriguez', diagnosis_code='H10.9', procedure_code='99204', total_charge=400.0, insurance_claim_amount=300.0),
 MedicalBilling(patient_id=123456, patient_name='Samantha Anderson', diagnosis_code='F32.9', procedure_code='99215', total_charge=250.0, insurance_claim_amount=200.0),
 MedicalBilling(patient_id=987654, patient_name='Aiden Thompson', diagnosis_code='H20.9', procedure_code='99213', total_charge=350.0, insurance_claim_amount=275.0),
 MedicalBilling(patient_id=246810, patient_name='Harper Johnson', diagnosis_code='K50.1', procedure_code='99203', total_charge=300.0, insurance_claim_amount=250.0),
 MedicalBilling(patient_id=246810, patient_name='Xavier Perez', diagnosis_code='M54.5', procedure_code='99212', total_charge=275.0, insurance_claim_amount=225.0),
 MedicalBilling(patient_id=123456, patient_name='Jasper Smith', diagnosis_code='S72.01', procedure_code='99214', total_charge=400.0, insurance_claim_amount=300.0),
 Medic

In [22]:
medical_bill=synthetic_results[0]
print(medical_bill.json())

{"patient_id": 456789, "patient_name": "Isabella Rodriguez", "diagnosis_code": "H10.9", "procedure_code": "99204", "total_charge": 400.0, "insurance_claim_amount": 300.0}
