In [1]:
import os
import openai
import tiktoken
import json
from PIL import Image
import io
import base64
from dotenv import load_dotenv, find_dotenv
import PyPDF2



_ = load_dotenv(find_dotenv()) 

# Functions

In [2]:
# Load the API key from a JSON file
with open('config.json', 'r') as file:
    config = json.load(file)
    api_key = config['openai_api_key']

openai.api_key = str(api_key)

In [3]:
def resize_compress_encode_image(image_path, output_size=(300, 100), quality=60):
    # Resize and compress the image
    with Image.open(image_path) as img:
        img = img.resize(output_size, Image.Resampling.LANCZOS)
        buffer = io.BytesIO()
        img.save(buffer, format="JPEG", quality=quality)
        buffer.seek(0)
        encoded_image = base64.b64encode(buffer.read()).decode()
    return encoded_image

In [4]:
def get_completion(prompt, model="gpt-4-1106-preview"):
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=0, # this is the degree of randomness of the model's output 
    )
    return response.choices[0].message["content"]

In [5]:
def send_image_query_to_openai(encoded_image):
    # Prepare the chat message payload with the encoded image
    payload = {
        "model": "gpt-4-vision-preview",  
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": "What’s in this image?"},
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{encoded_image}"
                        },
                    },
                ],
            }
        ]
    }

    # Send the request to the API
    response = openai.ChatCompletion.create(**payload)

    # Return the content of the response
    return response.choices[0].message["content"]

In [6]:
def query_openai_with_image_url(image_url, question="What’s in this image?"):
    # Prepare the chat message payload with the image URL
    payload = {
        "model": "gpt-4-vision-preview",  
        "messages": [
            {
                "role": "user",
                "content": [
                    {"type": "text", "text": question},
                    {
                        "type": "image_url",
                        "image_url": {"url": image_url},
                    },
                ],
            }
        ]
    }

    # Send the request to the API
    response = openai.ChatCompletion.create(**payload)
    return response.choices[0].message["content"] if 'choices' in response and response.choices else "No content found"


In [7]:
def read_python_file_to_string(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            code_string = file.read()
        return code_string
    except FileNotFoundError:
        return "File not found."
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [8]:
def notebook_to_string(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            notebook_content = json.load(file)
        return json.dumps(notebook_content, indent=4)  # Convert JSON content to a formatted string
    except FileNotFoundError:
        return "File not found: {}".format(file_path)
    except json.JSONDecodeError:
        return "Error decoding JSON from file: {}".format(file_path)
    except Exception as e:
        return "An error occurred: {}".format(str(e))

In [9]:
def string_to_notebook(json_string, file_path):
    try:
        # Parse the JSON string
        notebook_content = json.loads(json_string)

        # Write the JSON object to an .ipynb file
        with open(file_path, 'w', encoding='utf-8') as file:
            json.dump(notebook_content, file, indent=4)

        return f"Successfully created notebook: {file_path}"
    except json.JSONDecodeError:
        return "Invalid JSON string"
    except Exception as e:
        return f"An error occurred: {str(e)}"

In [10]:
def markdown_to_string(file_path):
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.read()
        return content
    except FileNotFoundError:
        return f"File not found: {file_path}"
    except Exception as e:
        return f"An error occurred: {str(e)}"


In [11]:
def string_to_markdown(content, file_path):
    try:
        with open(file_path, 'w', encoding='utf-8') as file:
            file.write(content)
        return f"Successfully written to {file_path}"
    except Exception as e:
        return f"An error occurred: {str(e)}"



In [12]:
def chunk_text_by_question(text):
    # Split text using a positive lookbehind on the pattern "### Question X"
    # This keeps the pattern in the resulting list
    questions = re.split(r'(?<=\n### Question \d+\n)', text)

    # Remove the first empty element if it exists
    if questions and questions[0].strip() == '':
        questions = questions[1:]

    return questions

In [13]:
import re

def chunk_text_by_question(text):
    # Split text on the pattern, but keep the pattern in the result
    questions = re.split(r'(\n### Question \d+\n)', text)

    # Reattach the split pattern to each chunk
    combined_questions = []
    for i in range(1, len(questions), 2):
        combined_question = questions[i] + questions[i+1]
        combined_questions.append(combined_question.strip())

    return combined_questions



In [14]:
def append_to_markdown(file_path, text_to_append):
    try:
        with open(file_path, 'a', encoding='utf-8') as file:
            file.write(text_to_append)
        return f"Text appended successfully to {file_path}"
    except FileNotFoundError:
        return "File not found."
    except Exception as e:
        return f"An error occurred: {str(e)}"

# Example usage
# file_path = 'test_2.md'  # Replace with your Markdown file path
# text_to_append = question_list[29]
# result = append_to_markdown(file_path, text_to_append)
# print(result)


In [15]:
import nbformat
import os

def notebook_to_markdown(notebook_path, markdown_path):
    try:
        # Read the notebook
        with open(notebook_path, 'r', encoding='utf-8') as file:
            notebook = nbformat.read(file, as_version=4)

        # Process each cell and extract content
        markdown_content = []
        for cell in notebook.cells:
            if cell.cell_type == 'markdown':
                # Directly add markdown content
                markdown_content.append(''.join(cell.source))
            elif cell.cell_type == 'code':
                # Add code in Markdown code block format
                code_block = '```python\n' + ''.join(cell.source) + '\n```'
                markdown_content.append(code_block)

        # Write content to the markdown file
        with open(markdown_path, 'w', encoding='utf-8') as md_file:
            md_file.write('\n'.join(markdown_content))

        return f"Markdown file created successfully: {markdown_path}"
    except FileNotFoundError:
        return "Notebook file not found."
    except Exception as e:
        return f"An error occurred: {str(e)}"




In [16]:
def resize_compress_encode_image(image_path, output_size=(300, 100), quality=60):
    # Resize and compress the image
    with Image.open(image_path) as img:
        img = img.resize(output_size, Image.Resampling.LANCZOS)
        buffer = io.BytesIO()
        img.save(buffer, format="JPEG", quality=quality)
        buffer.seek(0)
        encoded_image = base64.b64encode(buffer.read()).decode()
    return encoded_image

# Prompt 1: Create ebook

In [17]:
files = [ "example.md"]
for i, file in enumerate(files, start=1):
    print(file)
    content_string = markdown_to_string(file)

    prompt = f'''Please read example memory tricks for the Databricks Certified Data Engineer Associate exam and create 
    a blog expanding it with more clever memory techniques.
    My input json {content_string}'''

    completion = get_completion(prompt)
    if completion:
        output_path = f"example_2.md"
        print(output_path)
        result = string_to_markdown(completion, output_path)

example.md
example_2.md


In [18]:
files = [ "example_2.md"]
for i, file in enumerate(files, start=1):
    print(file)
    content_string = markdown_to_string(file)

    prompt = f'''
    Make it better blog more longer.
    My input json {content_string}'''

    completion = get_completion(prompt)
    if completion:
        output_path = f"example_2.md"
        print(output_path)
        result = string_to_markdown(completion, output_path)

example_2.md
example_2.md
