
## LangGraph Synthetic Data Generation

This notebook demonstrates the use of a LangGraph structure to generate and manage synthetic data involving question evolution. 
The LangGraph is used to model simple, multi-context, and reasoning evolutions of questions with corresponding answers and contexts.


In [1]:

class LangGraph:
    def __init__(self):
        self.questions = []
        self.answers = []
        self.contexts = []
    
    def add_question(self, question_text, evolution_type):
        question_id = 'Q' + str(len(self.questions) + 1)
        self.questions.append({
            'id': question_id,
            'text': question_text,
            'evolution_type': evolution_type
        })
        return question_id

    def add_answer(self, question_id, answer_text):
        self.answers.append({
            'question_id': question_id,
            'answer': answer_text
        })

    def add_context(self, question_id, context_text):
        self.contexts.append({
            'question_id': question_id,
            'context': context_text
        })

    def evolve_question(self, original_question_id, new_question_text, evolution_type):
        original_question = next((q for q in self.questions if q['id'] == original_question_id), None)
        if original_question:
            new_question_id = self.add_question(new_question_text, evolution_type)
            return new_question_id
        return None


In [2]:

# Instantiate the LangGraph
lang_graph = LangGraph()

# Add initial question and evolve it through various types
q1_id = lang_graph.add_question('What is synthetic data?', 'Simple Evolution')
lang_graph.add_answer(q1_id, 'Synthetic data is artificially generated data that mimics real-world data.')
lang_graph.add_context(q1_id, 'Used in scenarios where real data is unavailable or inadequate for training models.')

# Simple evolution example
q1_evol_id = lang_graph.evolve_question(q1_id, 'Define synthetic data and its primary uses.', 'Simple Evolution')
lang_graph.add_answer(q1_evol_id, 'Synthetic data refers to artificial data created by algorithms to simulate real-world data for training machine learning models and ensuring privacy in testing scenarios.')
lang_graph.add_context(q1_evol_id, 'Commonly used to avoid the complications of using real data such as privacy breaches or data scarcity.')

# Multi-context evolution example
q2_id = lang_graph.evolve_question(q1_id, 'How is synthetic data generated and what are its applications?', 'Multi-Context Evolution')
lang_graph.add_answer(q2_id, 'Synthetic data is generated using algorithms that model real data distributions, and it is used in machine learning, testing, and privacy protection.')
lang_graph.add_context(q2_id, 'Synthetic data generation involves statistical and machine learning techniques.')

# Reasoning evolution example
q3_id = lang_graph.evolve_question(q2_id, 'Why is synthetic data preferred over real data in sensitive applications?', 'Reasoning Evolution')
lang_graph.add_answer(q3_id, 'Synthetic data is preferred in sensitive applications because it can be used without violating privacy laws, making it ideal for compliance-driven industries.')
lang_graph.add_context(q3_id, 'Privacy concerns and regulatory compliance are pivotal in sectors like healthcare and finance.')
