In [1]:
import numpy as np
import openai
from metaphor_python import Metaphor
from transformers import pipeline
import re
from sentence_transformers import SentenceTransformer, util
import requests

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
openai.api_key = OPENAI_KEY
metaphor = Metaphor(METAPHOR_KEY)

# metaphor.search() --> Searching content
# metaphor.findsimilar() --> find similar links
# metaphor.getcontents() --> Get contents

In [4]:
def generate_query(topic):
    response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are a helpful assistant and a knowledgeable academic researcher."},
        {"role": "user", "content": f"Generate a short search query for the topic: {topic}"},
    ],
    )
    return response

def search_topic(topic):
    """Simple search tool using the metaphor API"""

    response = metaphor.search(
        topic,
        num_results=10,
        use_autoprompt=True,
    )
    return response

def get_notes(content_dict):
    all_text = ""
    for cont in content_dict:
        text = ""
        text += '\n The link for this article is: {cont} \n'
        text += content_dict[cont] + "\n"
        all_text += text 

    SYSTEM_MESSAGE = "You are an academic researcher. Present the given information as detailed academic notes on the content for a student to understand the topic. Cite all sources at the end of the article."

    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": SYSTEM_MESSAGE},
            {"role": "user", "content": all_text},
        ],
    )
    return completion


def categorize_content(content_list):
    text_cont = []
    video_cont = []
    for content in content_list:
        if re.search(r"youtube\.com", content.url):
            video_cont.append(content)
        else:
            text_cont.append(content)
    return text_cont, video_cont

def get_most_relevant_content(text_content, query):
    """Using a praphrasing sentence transformer to get the most relevent titles to the query through embeddings"""
    titles = [content.title for content in text_content]
    model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
    titles_plus_query = titles + [query]
    embeddings = model.encode(titles_plus_query, convert_to_tensor=True)
    query_embedding = embeddings[-1]
    scores = util.pytorch_cos_sim(query_embedding, embeddings[:-1])[0]
    relevant_indices = scores.argsort(descending=True)
    relevant_content = [text_content[i] for i in relevant_indices]
    return relevant_content

def fetch_contents(contents):
    url_dict = {}
    for cont in contents:
        url_dict[cont.url] = cont.extract
    return url_dict
    

In [None]:
topic_info = search_topic('machine learning')
conts = topic_info.get_contents()
content_list = conts.contents

type(content_list), len(content_list), type(topic_info.results)

In [4]:
conts.contents

[DocumentContent(id='pRCiC-WJjIZQqU78uOPStQ', url='https://www.youtube.com/watch?v=KNAWp2S3w94&feature=youtu.be', title='Intro to Machine Learning (ML Zero to Hero - Part 1)', extract="Machine Learning represents a new paradigm in programming, where instead of programming explicit rules in a language such as Java or C++, you build a system which is trained on data to infer the rules itself. But what does ML actually look like? In part one of Machine Learning Zero to Hero, AI Advocate Laurence Moroney (lmoroney@) walks through a basic Hello World example of building an ML model, introducing ideas which we'll apply in later episodes to a more interesting problem: computer vision.\n\nTry this code out for yourself in the Hello World of Machine Learning → https://goo.gle/2Zp2ZF3\n\nThis video is also subtitled in Chinese, Indonesian, Italian, Japanese, Korean, Portuguese, and Spanish.\n\nWatch more Coding TensorFlow → https://bit.ly/Coding-TensorFlow \nSubscribe to the TensorFlow channel →

In [51]:
search_query = generate_query('Working of a compiler')
search_query.choices[0].message.content

How does a compiler work?


In [52]:
topic_info = search_topic(search_query.choices[0].message.content)
conts = topic_info.get_contents()
content_list = conts.contents

content_list

[DocumentContent(id='UC8L7F_QsXwK3hIbtiAnVA', url='https://towardsdatascience.com/understanding-compilers-for-humans-version-2-157f0edb02dd?gi=ed57224cd2a2', title='Understanding Compilers — For Humans (Version 2)', extract='<div><div><figure></figure><p></p><h2>How Programming Languages Work</h2><p></p><p>Understanding your compiler internally allows you to use it effectively. Walk through how programming languages and compilers work in this chronological synopsis. Lots of links, example code, and diagrams have been composed to aid in your understanding.</p></div><div><h2>Author’s Note</h2><p><em>Understanding Compilers — For Humans (Version 2)</em> is a successor to my second article on Medium, with over 21 thousand views. I am so glad I could make a positive impact on people’s education, and I am excited to bring <strong>a complete rewrite based on the input I received from the original article</strong>.</p><p>I chose Rust as this work’s primary language. It is verbose, efficient, m

In [30]:
notes = get_notes(content_list[0].extract)

In [34]:
notes.choices[0].message.content

"Load balancing is a concept used to distribute incoming application and network traffic across multiple servers, ensuring that no single server is overloaded. Load balancers are placed between users and server clusters and distribute requests from users across all servers capable of fulfilling those requests. The load balancer utilizes algorithms to evenly distribute the traffic, increasing the capacity and reliability of applications. If one server goes down, the load balancer redirects traffic to the remaining healthy servers. Load balancers are used by organizations that have high levels of website activity or mission-critical web and mobile applications. There are two types of load balancers available: hardware devices and software solutions. Hardware devices are physical devices installed in organizations' IT departments or data centers, while software solutions can be installed on physical or virtual machines in data centers, on-premises, or in the cloud. Using load balancers he

In [73]:
urls = categorize_urls(content_list)

for cont in content_list:
    print(cont.title)

search_query.choices[0].message.content, urls

Understanding Compilers — For Humans (Version 2)
How a Compiler Works in ~1 minute

Redirecting…
Understanding Compilers — For Humans
Anatomy of a Compiler
How do computers read code?
How Compiler Works
GitHub - ahoppen/introduction-to-compilers: Swift Playground giving an overview over the inner workings of modern compilers
The Heart Of A Compiler


('How does a compiler work?',
 (['https://towardsdatascience.com/understanding-compilers-for-humans-version-2-157f0edb02dd?gi=ed57224cd2a2',
   'http://staff.ustc.edu.cn/~han/CS152CD/Content/COD3e/CDSections/CD2.12.pdf',
   'https://nurkiewicz.com/59',
   'https://medium.com/@thelukaswils/understanding-compilers-for-humans-ba970e045877',
   'http://www.cs.man.ac.uk/~pjj/farrell/comp3.html',
   'http://xahlee.info/parser/compiler_tutorial.html',
   'https://github.com/ahoppen/introduction-to-compilers',
   'https://www.i-programmer.info/babbages-bag/327-compiler.html'],
  ['https://www.youtube.com/watch?v=IhC7sdYe-Jg&list=UUVaFJAgYKMWu9eHQGQAsT0Q',
   'https://www.youtube.com/watch?v=QXjU9qTsYCc']))

In [67]:
# For now let's just summarize the first few links
# Write a function to get information from multiple links and cite the links
# Add Sample youtube suggestion at the end for video information

info = content_list[0].extract + content_list[3].extract + content_list[5].extract
sample_sum = get_notes(info)

In [68]:
sample_sum.choices[0].message.content

"Title: How Programming Languages Work\n\nIntroduction:\n- Compiler is a software that translates human-readable text into computer-readable machine code.\n- Understanding compilers allows for effective use.\n- This article provides a chronological synopsis of how programming languages and compilers work.\n- Contains links, example code, and diagrams for better understanding.\n\nAuthor's Note:\n- This article is a complete rewrite based on the author's previous article that had over 21 thousand views.\n- Rust was chosen as the primary language for this work due to its simplicity for making compilers.\n- The article is written to keep the reader's attention and provides links to deeper resources on specific topics.\n- Reader's questions and suggestions are welcome in the comment section.\n\nWhat a Compiler is:\n- A programming language is actually a software called a compiler.\n- The compiler reads a text file, processes it, and generates binary code.\n- This process allows a computer t

In [79]:
get_most_relevent_titles(urls[0], search_query)

['https://towardsdatascience.com/understanding-compilers-for-humans-version-2-157f0edb02dd?gi=ed57224cd2a2',
 'https://medium.com/@thelukaswils/understanding-compilers-for-humans-ba970e045877',
 'http://xahlee.info/parser/compiler_tutorial.html',
 'https://www.i-programmer.info/babbages-bag/327-compiler.html',
 'https://github.com/ahoppen/introduction-to-compilers',
 'https://nurkiewicz.com/59',
 'http://www.cs.man.ac.uk/~pjj/farrell/comp3.html',
 'http://staff.ustc.edu.cn/~han/CS152CD/Content/COD3e/CDSections/CD2.12.pdf']

In [90]:
text, vid = categorize_content(content_list)
rel_content = get_most_relevent_content(text, search_query)
cont = fetch_contents(rel_content[:3])
notes = get_notes(cont)

In [95]:
notes.choices[0].message.content

'- A programming language is actually just software, called a compiler, that reads human-readable code and translates it into computer-readable machine code.\n- Compilers have several steps of processing, including reading source code, sorting the characters into words, determining operations, and generating assembly code or binary.\n- Interpreters are similar to compilers, but they skip code generation and directly execute the code.\n- Assembly language is a low-level language that is still readable by humans, while machine code is the actual instructions executed by the CPU.\n- Assemblers translate assembly language code into machine code, and linkers combine multiple object files into an executable file.\n- The task of a compiler is divided into analysis (parsing) and synthesis (code generation).\n- Intermediate language or bytecode is often generated during parsing and can be used to generate different target languages.\n- Parsing typically involves phases such as tokenization, gra

In [5]:
def generate_academic_notes(topic):
    query = generate_query(topic)
    search_response = search_topic(query.choices[0].message.content)
    text_content, video_content = categorize_content(search_response.get_contents().contents)

    relevant_content = get_most_relevant_content(text_content, query)

    # For simplicity picking top 3 most relevent responses
    content = fetch_contents(relevant_content[:3])
    notes_response = get_notes(content)
    notes = notes_response.choices[0].message.content
    
    return notes

In [6]:
notes = generate_academic_notes("Working of a compiler")

In [7]:
notes

"Title: Understanding Compilers: Translating High-Level Languages into Machine Code\n\nIntroduction:\n- Computers can only understand machine code, a low-level programming language consisting of sequences of ones and zeros.\n- Machine code is often referred to as binary code, as it is represented using a base 2 number system.\n- High-level programming languages, such as Python, Ruby, C#, and Java, have been developed to resemble human language.\n\nCompilers and Interpreters:\n- Computers do not understand high-level programming languages directly, so translators are needed.\n- Compilers and interpreters are programs that translate code written in high-level languages into machine code.\n- Compiling involves checking the code for syntax errors and generating a file containing compiled code.\n- If a code doesn't compile, it means that errors were found during the translation process.\n- Interpreters work differently from compilers but also translate source code into machine code.\n\nFunc

In [10]:
from util import *

In [11]:
notes = generate_academic_notes("Newton's laws of motion")

In [13]:
print(generate_query("Newton's laws of motion"))

{
  "id": "chatcmpl-84hfhxp1bySFeWQEwT77hDJtoJ4oJ",
  "object": "chat.completion",
  "created": 1696131877,
  "model": "gpt-3.5-turbo-0613",
  "choices": [
    {
      "index": 0,
      "message": {
        "role": "assistant",
        "content": "\"Newton's laws of motion: overview and application\""
      },
      "finish_reason": "stop"
    }
  ],
  "usage": {
    "prompt_tokens": 36,
    "completion_tokens": 11,
    "total_tokens": 47
  }
}
