In [184]:
import torch
import tensorflow as tf
import pprint
from datasets import load_dataset
import openai
from transformers import pipeline
from sentence_transformers import SentenceTransformer, util
from IPython.display import display, HTML
import time

In [185]:
start_time = time.time()
print(torch.cuda.is_available())  # Should return True if CUDA is available


True


In [186]:
# Reading API Key from file
API_KEY = open("open_ai_API.txt", "r").read().strip()
openai.api_key = API_KEY


In [187]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [188]:
data = load_dataset("generaleoley/manim-codegen", split='train')
# print(data)

In [189]:
# Choosing a specific code from the dataset by giving value of 'reference', change value for each code 
reference = 3
code_string = data[reference]['answer']


In [190]:
# Replace \\n with actual newline characters
formatted_code_string = code_string.replace('\\n', '\n')
''
# Print the formatted code
print(formatted_code_string)



 from manim import *

class DoubleIntegralPolarExample(Scene):
    def construct(self):
        # Displaying the problem
        problem = MathTex(r"\int_{0}^{\pi/2} \int_{0}^{1} r dr\,d\theta")
        self.play(Write(problem))
        self.wait(1)

        # Explanation for inner integral
        explanation1 = Text("Integrate with respect to r first", font_size=24).to_edge(UP)
        self.play(Write(explanation1))
        self.wait(2)

        # Inner integral step
        inner_integral = MathTex(r"= \int_{0}^{\pi/2} [\frac{1}{2}r^2]_{0}^{1} d\theta")
        self.play(ReplacementTransform(problem.copy(), inner_integral))
        self.wait(1)

        # Explanation for final integral
        explanation2 = Text("Now integrate with respect to \( \theta \)", font_size=24).to_edge(UP)
        self.play(Transform(explanation1, explanation2))
        self.wait(2)

        # Final solution
        final_solution = MathTex(r"= [\frac{1}{2}\theta]_{0}^{\pi/2}")
        self.play(Replacem

In [191]:
# Prepare the messages for the Chat API which will be used later for cosine similarity
messages = [
    # {"role": "system", "content": "You are a helpful assistant that performs semantic analysis."},
    {"role": "user", "content": f"Explain the purpose of this code wihtout getting into technical details, \n\nParagraph 1:\n{formatted_code_string}"},

    {"role": "user", "content": f"In detail, explain what is happening with the visuals,  \n\nParagraph 1:\n{formatted_code_string} "}
]

# Make the API call
response = openai.chat.completions.create(
    model="gpt-4o-mini",  # Or another model as appropriate
    messages=messages,
    max_tokens=150,  # Adjust based on your needs
    temperature=0.01  # Adjust for more creativity or precision
)


In [192]:
open_gen = response.choices[0].message.content
print(open_gen)

The code provided is a script for creating an animated mathematical explanation using the Manim library, which is designed for producing mathematical animations. Here’s a detailed breakdown of what happens visually in each step of the code:

1. **Displaying the Problem**:
   - The animation begins by displaying the double integral problem: \(\int_{0}^{\pi/2} \int_{0}^{1} r \, dr \, d\theta\). 
   - This is done using a mathematical text format (MathTex), which allows for clear and professional rendering of mathematical expressions.
   - The `Write` function animates the appearance of this integral on the screen, making it look like it is being written out in real-time. After this


In [193]:
# Initialize the model
model = SentenceTransformer('all-MiniLM-L6-v2').to(device)

In [194]:
def similarity_f(text1,text2):
    """
    Computes the cosine similarity between the embeddings of two text inputs.

    Args:
    -----
    text1 (str): 
        The first input text that will be encoded and compared.
    
    text2 (str): 
        The second input text that will be encoded and compared.

    Returns:
    --------
    float: 
        A floating-point value representing the cosine similarity between the two text embeddings. The value ranges
        between -1 and 1:
        - 1 indicates that the texts are identical in terms of the embedding space.
        - 0 indicates that the texts are orthogonal (no similarity).
        - -1 indicates maximum dissimilarity in terms of the embedding space.
        
    """
    embedding1 = model.encode(text1, convert_to_tensor=True)
    embedding2 = model.encode(text2, convert_to_tensor=True)
    return util.pytorch_cos_sim(embedding1, embedding2).item()

In [195]:
def printer(printing_content):
    """
    Displays content in a formatted HTML output with word wrapping.

    This function takes a string input and displays it using IPython's display capabilities, ensuring that
    the text is wrapped appropriately (i.e., long lines of text won't overflow the display area). It uses 
    HTML's `pre-wrap` style to preserve spaces and newlines while also wrapping text for better readability.

    Args:
    -----
    printing_content (str):
        The content to be displayed. This can be any text or string that should be printed with word wrapping.
    
    Returns:
    --------
    None:
        The function outputs the wrapped content to the notebook using IPython's `display` and `HTML` functions,
        but it does not return any value.
    """
    # Display the output with word wrapping
    wrapped_output = f"<div style='white-space: pre-wrap;'>{printing_content}</div>"
    return display(HTML(wrapped_output))

In [196]:
top_similarities = []  # List to store (similarity, index) tuples
top_n = 10  # Number of top similarities to keep

for i in range(len(data)):
    query_response = data[i]['query']
    score = similarity_f(open_gen, query_response)
    
    # Add the current score and index as a dictionary to the list
    top_similarities.append({'score': score, 'index': i, 'query': data[i]['query']})  # Storing the score and index in a dictionary

    # Sort the list by similarity score in descending order and keep only the top N
    top_similarities = sorted(top_similarities, key=lambda x: x['score'], reverse=True)[:top_n]

# Now, top_similarities contains the top 10 highest scores along with their indices

In [197]:
print(top_similarities[2])

{'score': 0.7671608924865723, 'index': 444, 'query': 'I would like to create an educational animation that visually walks through the process of solving a double integral in polar coordinates. The specific double integral problem to demonstrate is the integral from 0 to π/2 of the integral from 0 to 1 of ρ² sin(φ) dρ dφ. First, the problem should be presented, and then the animation should show the step-by-step process. It should start by integrating with respect to ρ, including replacing the ρ terms with the limits of integration and simplifying the expression. Next, display a message about integrating the resulting expression with respect to φ. Finally, show that work and the calculation of the final numerical result, which should come out to 2/3. Arrange all the parts of this explanation in a clear way, one after the other, and use pauses to allow time to understand each step. Can you make an animation like this that would be useful in a mathematics classroom?'}


In [198]:
all_scores = [item['score'] for item in top_similarities]
all_indices = [item['index'] for item in top_similarities]
all_queries = [item['query'] for item in top_similarities]

In [199]:
top_similarities[1]['query']

'Can you create an educational animation that visually explains the step-by-step process of solving the double integral in polar coordinates of the function \\( r \\) over the region defined by \\( \\theta \\) from 0 to \\( \\pi \\) and \\( r \\) from 0 to 1, including written explanations and the final solution?'

In [200]:
def create_message_for_comparison(code_snippet, paragraphs):
    """
    Compares the prompts with the highest cosine similarity to ultimately find the correct prompt for the code 

    Args:
    -----
    code_snippet (str):
        A string containing the Python code that will be compared against the provided paragraphs.
    
    paragraphs (list of str):
        A list of paragraphs (prompts) to be compared with the Python code. Each paragraph is a potential
        candidate that could have generated the provided code snippet.

    Returns:
    --------
    list:
        A list containing two dictionaries, formatted as messages:
        1. A system message that defines the role and behavior of the model.
        2. A user message that contains the code snippet and the paragraphs for comparison.
    """
    # System message to guide GPT's behavior
    system_message = {
        "role": "system",
        # "content": "You are an assistant that compares Python code snippets with paragraphs and finds the most similar match."
        "content": "You are an assistant that compares prompts to find the one which most likely generated the provided code"
    }
    
    # User message that provides the code and paragraphs
    user_message_content = f"""
    Here is the Python code snippet:
    
    ```python
    {code_snippet}
    ```

    Below are the prompts {len(paragraphs)}. Compare each prompt with the provided code snippet and determine which prompt is the most similar to the code. Provide the index of the most similar prompt and print that prompt under ** ** without paragraph index or anything else.
    """
    
    # Append each paragraph with its index
    for i, paragraph in enumerate(paragraphs, start=1):
        user_message_content += f"\nParagraph {i}: {paragraph}"
    
    user_message = {
        "role": "user",
        "content": user_message_content
    }
    
    return [system_message, user_message]

In [201]:
messages = create_message_for_comparison(code_string, all_queries)

In [202]:
# Make the API call to GPT-4o-mini for comparison
response = openai.chat.completions.create(
    model="gpt-4o-mini",
    messages=messages,
    max_tokens=700,
    temperature=0.01
)

# Extract and return the most similar query from the response
response_content = response.choices[0].message.content
printer(response_content)

In [203]:
# Isolates output and runs a search for it through the dataset to find its corresponding index in the dataset

def searching(response):
    search_phrase = response_content[20:-15]  # Skip the first 15 characters
    # Search for the phrase in the 'query' column and store the row number
    filtered_rows_with_indices_query = [(index, example) for index, example in enumerate(data) if search_phrase.lower() in example['query'].lower()]

    print("Reference =", reference,",  No. of instances =",len(filtered_rows_with_indices_query))

    # Display the filtered rows with their corresponding row numbers
    for index, row in filtered_rows_with_indices_query:
        print(f"Row {index}:")
        print(row['query'])



In [204]:
print(searching(response_content))

Reference = 3 ,  No. of instances = 1
Row 615:
Can you create an educational animation that visually explains the process of solving a double integral in polar coordinates, specifically showing step-by-step how to integrate `∫ from 0 to 2π ∫ from 0 to 1 of r dr dθ`? The animation should include the initial problem, text explanations before each step, and the mathematical work of performing the inner integral with respect to 'r' first, then computing the outer integral with respect to 'θ', and finally displaying the numeric answer. Each step should appear one after the other with a short pause between them for clarity.
None


In [205]:
#calculating time for run the entire algo
end_time = time.time()
print(end_time - start_time)

18.88306164741516
