# Navigator Text-to-Python Notebook

In [1]:
#%%capture
#!pip install git+https://github.com/gretelai/gretel-python-client@dev/data-designer-m1

session_kwargs = {
    "api_key": "prompt",
    "endpoint": "https://api-dev.gretel.cloud",
    "cache": "yes",
}

In [2]:
from gretel_client.navigator import DataDesigner

### 📘 Text-to-Python Blueprint

In [10]:
text2python_blueprint_string ="""
model_suite: Apache-2.0

special_system_instructions: >-
   You are an expert at writing Python code and technical documentation. You are obsessed with writing clean,
   efficient, and maintainable code. You are tasked with generating Python code and natural
   language text that will be used to train a language model that will be used to generate Python code.

seed_categories:
    - name: domain
      description: Major industry domain or sector that relies on robust software solutions
      values: [Healthcare, Finance, Retail, Manufacturing, Education, Public Health, Science and Technology, Environmental Science, Government, Media and Entertainment, 
            Transportation, Energy, Agriculture, Manufacturing, Food and Beverage, Wellness, Construction, Automotive, Telecommunications, Public Services, Financial Services, 
            Medicine, Social Services, Education and Training, Information and Communications, Environment, Textiles, Startups, Legal and Law, Entertainment, Pharmaceuticals, 
            Food Service, Advertising, Financial Planning, Travel and Tourism, Waste Management, E-commerce, Hospitality, Philanthropy, Public Health, Sports, Social Media, 
            Venture Capital, Arts and Culture, Economics, Artificial Intelligence, Biotechnology, Renewable Energy and Sustainability, Business and Entrepreneurship, 
            Defense and Aerospace, Logistics, Oil and Gas, Fashion and Apparel, Human Resources, Music, Nonprofit, Gaming, Insurance, Space Exploration, Banking, Smart Cities, 
            Recreation, Maritime, Electricity, Gas & Water Services, Wholesale Trade, Hotel and Resorts, Rental Services, Fitness, Agricultural Technology, Consulting, Analytics, 
            Chemicals, Urban Planning, Internet of Things, Global Trade, Automation Technology, Journalism, Engineering, Psychology, Scientific Research, Publishing, Cybersecurity, 
            Credit Cards & Loans, Robotics & Computing, Digital Health, Consumer Electronics, Business Intelligence, Market Research, Sales Forecasting, Data Governance, Digital Marketing]
      subcategories:
        - name: topic
          num_values_to_generate: 25
          description: Key topics that professional Python developers care about for corresponding 

    - name: code_complexity
      description: Complexity of the Python code.
      values:
          - "Intermediate: Control flow, functions, and modules"
          - "Advanced: Classes, inheritance, and exceptions"
          - "Expert: Decorators, generators, and context managers"

    - name: natural_language_type
      description: Type of natural language that will be paired with a block of code
      num_values_to_generate: 10
      values:
          - a natural language prompt for a Python coding task.
          - a question about how to solve a problem using a Python program
          - a code description that would appear at the top of a function, class, or module
          - an instruction that instructs a user to write Python code for a specific task

data_columns:
    - name: natural_language_prompt
      description: Natural language text that will be paired with a block of Python code
      specific_instructions: "Generate {natural_language_type} that is relevant to {topic} in the {domain} industry. Ensure that the prompt you generate is specific, accurate and detailed"
    
    - name: suggested_packages
      description: Suggested Python packages that are commonly used in the {domain} industry
      specific_instructions: "Suggest up to 4 Python packages that are commonly used in the {domain} industry."
      output_type: list
      relevant_columns: [domain, topic, natural_language_prompt]

    - name: code
      description: Python code that will be paired with natural language text.
      specific_instructions: "{natural_language_prompt}"
      relevant_columns: [domain, topic, code_complexity, suggested_packages]
      output_type: code
      llm_type: code

data_validators:
    - validator: code
      code_lang: python
      code_columns: [code]
"""

In [None]:
# Defines a new DataDesigner instance
designer = DataDesigner.from_config(text2python_blueprint_string, **session_kwargs)
designer


### 👀 Generating a dataset preview

In [None]:
preview = designer.generate_dataset_preview()

In [None]:
preview.display_dataframe_in_notebook()

### 🔎 Taking a closer look at single records

In [None]:
designer.display_sample_record(preview.output.sample(1))

### 🤔 Like what you see? Generate an entire dataset

In [None]:
# Submit a batch workflow to generate records
results = designer.submit_batch_workflow(num_records=50)

In [None]:
# Fetch the dataset
df = results.fetch_dataset(wait_for_completion=True)

In [None]:
# Inspect the dataset
df.head()