<a href="https://colab.research.google.com/github/cooolbabu/GoogleGemini101/blob/main/AzureDatabricks/Boiler_code_for_Github_Claude.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# About this notebook
- We are using Claude to generate create table code
- Generated is stored in Github
- This an example of Claudes failure

# Package installs

In [None]:
%pip install anthropic
%pip install PyGithub

### Imports, get keys, get llm client and set model to variable MODEL_NAME

In [None]:
from anthropic import Anthropic
import json
import re
from pprint import pprint
from google.colab import userdata
from github import Github

# Get the OpenAI API key from Colab secrets
github_token=userdata.get('Github_Token')
claude_api_key=userdata.get('Claude_api_key')
# Initialize a GitHub instance
g = Github(github_token)

client = Anthropic(api_key=claude_api_key)
MODEL_NAME = "claude-3-opus-20240229"

### Github helper functions
* read_file_as_string()
* check_in_file(repo_name, file_path, file_content, content_tag, branch)

In [None]:
def read_file_as_string(file_path):
    """
        Reads the file and return a string representation of the file contents

        Parameters:
            file_path (str): Filename including filepath
    """
    try:
        with open(file_path, 'r') as file:
            file_contents = file.read()
        return file_contents
    except FileNotFoundError:
        print(f"File '{file_path}' not found.")
        return None
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

def check_in_file(repo_name, file_path, file_content, content_tag, branch):
    """
        Checks if a specific file exists in a GitHub repository and updates it with new content if it does.
        If the file does not exist, it creates a new file with the provided content.

        This function operates on a specific branch named 'test'. If updating, it will commit the changes with a given content tag as the commit message.
        In case the file needs to be created, it will also use the content tag as the commit message for the new file.

        Parameters:
        - repo_name (str): The name of the repository, formatted as 'username/repository'.
        - file_path (str): The path to the file within the repository. This should include the file name and its extension.
        - file_content (str): The content to be written to the file. This is used both for updating and creating the file.
        - content_tag (str): A message associated with the commit used for updating or creating the file.
        - branch (str): Github branch for the code

        Behavior:
        - If the file exists at the specified path, it updates the file with `file_content`, using `content_tag` as the commit message.
        - If the file does not exist, it creates a new file at the specified path with `file_content`, also using `content_tag` as the commit message for creation.
        - Upon successful update or creation, prints a success message indicating the action taken.
    """

    # Get the repository
    repo = g.get_repo(repo_name)

    try:
        # Get the contents of the file if it exists
        file = repo.get_contents(file_path, ref=branch)

        # Update the file
        repo.update_file(file_path, content_tag, file_content, file.sha, branch=branch)
        print(f"File '{file_path}' updated successfully.")
    except:
        # If the file doesn't exist, create it
        print(f"{file_path}/{file_content} does not exist")
        repo.create_file(file_path, content_tag, file_content, branch=branch)
        print(f"File '{file_path}' created successfully.")

def create_notebook(response, system_message, instructions, filename):
    # Extract summary, code, and explanation from the response JSON
    summary = response["summary"]
    code = response["code"]
    explanation = response["explanation"]

    # Create the notebook content
    notebook_content = f"""# Databricks notebook source

# MAGIC %md
# MAGIC # Summary
# MAGIC {summary}

# COMMAND ----------

# MAGIC %md
# MAGIC # Code (use Databricks workspace formatter to format the code)

# COMMAND ----------

{code}

# COMMAND ----------

# MAGIC %md
# MAGIC # Explanation
# MAGIC {explanation}

# COMMAND ----------

# MAGIC %md
# MAGIC # GenAI Instructions
# MAGIC * ## AI Role
# MAGIC {system_message}

# COMMAND ----------
# MAGIC %md
# MAGIC * ## Instructions (Try edit mode for visualizing table structure)
# MAGIC {instructions}
"""

    # Write the notebook content to a file
    with open(filename, "w") as f:
        f.write(notebook_content)

    print(f"Notebook '{filename}' has been created.")

    return notebook_content

In [None]:
import ast

def convert_str_to_dict(s):
    try:
        d = ast.literal_eval(s)
        if isinstance(d, dict):
            return d
        else:
            raise ValueError("Input is not a valid dictionary string")
    except (ValueError, SyntaxError):
        raise ValueError("Input is not a valid dictionary string")

import string

def strip_control_characters_old(s):
    # Create a translation table that maps all control characters to None
    control_chars = dict.fromkeys(range(0x00, 0x20), ' ')
    control_chars.update(dict.fromkeys(range(0x7f, 0xa0), ' '))

    # Translate the string using the translation table
    cleaned_str = s.translate(dict.fromkeys(control_chars, ' '))

    return cleaned_str

def strip_control_characters(s):
    # Create a translation table that maps all control characters and special characters to a space ' '
    control_chars = dict.fromkeys(range(0x00, 0x09), ' ')  # Exclude \n, \r, \f
    control_chars.update(dict.fromkeys(range(0x0B, 0x0C), ' '))
    control_chars.update(dict.fromkeys(range(0x0E, 0x20), ' '))
    control_chars.update(dict.fromkeys(range(0x7f, 0xa0), ' '))
    special_chars = dict.fromkeys(map(ord, string.punctuation.replace('\n', '').replace('\r', '').replace('\f', '')), ' ')
    control_chars.update(special_chars)

    # Translate the string using the translation table
    cleaned_str = s.translate(control_chars)

# Setup
1.   System Message
2.   User Message



In [None]:
system_message = """
You are  Azure Databricks data engineer.
    You will be given tasks and asked to write spark code.
    You will use best practices for writing code.
    All the code will run within a sparksession. Added date and time to session name
    Your response will be in JSON format with keys "summary", "code", "explanation".
    Do not include introductory line the response.
    Ensure that your response has no special characters.
  """.strip()


user_message_content = """
  I will give you schema for a table. You are tasked with creating a spark session and neccessary code to create the table called orders_bronze.
  The table will be in the database Bookstore_catalog. Schema is bronze. Your code must create an empty table.
  The schema is as follows
  root
  |-- order_id: string (nullable = true)
  |-- order_timestamp: long (nullable = true)
  |-- customer_id: string (nullable = true)
  |-- quantity: long (nullable = true)
  |-- total: integer (nullable = true)
  |-- books: array (nullable = true)
  |    |-- element: struct (containsNull = true)
  |    |    |-- book_id: string (nullable = true)
  |    |    |-- quantity: integer (nullable = true)
  |    |    |-- subtotal: long (nullable = true)
  |-- _rescued_data: string (nullable = true)
  |-- file_name: string (nullable = true)
  |-- processed_timestamp: timestamp (nullable = true)
  """.strip()

# Make the call to LLMs

In [None]:
# Create the message with variables
response = client.messages.create(
    model="claude-3-opus-20240229",
    max_tokens=1000,
    temperature=0,
    system=system_message,
    messages=[
        {"role": "user", "content": user_message_content}
    ]
)

# Assuming you have a client setup for interaction. Ensure to configure your OpenAI client appropriately.

message = response.content[0].text

# Validate response from LLM

In [None]:
print(type(response))

In [None]:
print(message)
resp_in_json = json.loads(message)

In [None]:
file_contents = create_notebook(resp_in_json, system_message, user_message_content, "orders_bronze_notebook-t2.py")

In [None]:
print(file_contents)

# Check into Github
*   repository : "cooolbabu/GoogleGemini101"
*   filename : "AzureDatabricks/filename" - specify actual filename
*   filecontent: Contents of the file to check in
*   tag_name: give a comment. It will show in Github
* branch: branch name to check into. Ensure that branch already exists
          Future TODO: if branch doesn't exist (notify, ask, process)



In [None]:


check_in_file(repo_name="cooolbabu/GoogleGemini101",
              file_path="AzureDatabricks/ConfigureDB/create_order_table-t1.py",
              file_content=file_contents,
              content_tag='creating orders table added control characters t5',
              branch="pyspark-genai-t2")

In [None]:
import base64
from IPython.display import Image, display
import matplotlib.pyplot as plt

def mm(graph):
    graphbytes = graph.encode("utf8")
    base64_bytes = base64.b64encode(graphbytes)
    base64_string = base64_bytes.decode("ascii")
    display(Image(url="https://mermaid.ink/img/" + base64_string))

mm("""
graph LR;
    A--> B & C & D;
    B--> A & E;
    C--> A & E;
    D--> A & E;
    E--> B & C & D;
""")

In [None]:
mm("""
   flowchart LR

A[Hard] -->|Text| B(Round)
B --> C{Decision}
C -->|One| D[Result 1]
C -->|Two| E[Result 2]
   """)

In [None]:
mm("""
  erDiagram
    CUSTOMER ||--o{ ORDER : PLACES
    CUSTOMER {
        string customer_id
        string email
        string PROFILE(jsonObject)
        string updated
    }
    PROFILE {
        string first_name
        string last_name
        string gender
        string address
    }
    CUSTOMER ||--|| PROFILE : HAS

    ORDER ||--|{ BOOKS_ORDERED : CONTAINS
    ORDER {
        string order_id
        bigint order_timestamp
        string customer_id
        quanity bigint
        double total
        string BOOKS_ORDERED(jsonObject)
    }
    BOOKS_ORDERED {
        string book_id
        bigint quanity
        double subtotal
    }
    BOOKS {
        string book_id
        string title
        string author
        string category
        double price
    }
""")