In [1]:
import openai
import shutil
import re
import json
import os
import pickle
import nltk
from dotenv import load_dotenv
nltk.download('stopwords')
from nltk.corpus import stopwords
stop_words = set(stopwords.words('english'))

load_dotenv('.env.local')
openai.api_key = os.getenv("OPENAI_API_KEY")

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/alaney2/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [2]:
def generate_prompt(topic):
    parts = topic.split('_')
    parts = [part.capitalize() for part in parts]
    topic = ' '.join(parts)
    prompt = f'Topic: {topic}\n' + '''
    You are a world-renowned AI and ML expert.
    Provide a JSON object containing the topic, a list of 0-8 prerequisite topics, and a list of 0-8 further readings related to AI, ML, and DL.
    Ensure that the prerequisites and further readings are specifically relevant to the given, rather than broad topics like calculus or statistics.
    When generating topics, prefer the singular form of the topic, such as "convolutional_neural_network" instead of "convolutional_neural_networks" but use the plural form when it makes more sense to ("policy_gradient_methods").
    The name of the JSON object must match exactly with the given topic.
    Ensure that the title field is properly capitalized and spaced and has the right punctuation (such as Q-Learning).
    Also ensure that the topic, prerequisites, and further readings are in snake_case. Do not put single quotes anywhere in the JSON object.
    Use a similar format to the example provided below and ensure that the JSON object is valid.:

    Example:
    {
        "topic_example": {
            "title": "Topic Example",
            "prerequisites": ["page_a", "page_b", "page_d"],
            "further_readings": ["page_c", "page_f", "page_z", "page_s"]
        }
    }

    Next, write a detailed wiki page about the given topic in Markdown format. Always write from a third-person perspective and remain unopinionated.
    Ensure that this wiki page is explicitly in code format. 
    Do not include a "Contents" section. 
    Do not include a "Further Readings" nor a "Prerequisites" section if they just include related topics.
    Use a neutral, unbiased tone without exclamation marks. 
    Ensure that the heading is the same as the title in the JSON object.
    Follow Markdown syntax for headings and formatting, and use LaTeX for equations, with inline equations in pairs of $ and multiline equations in $$.
    Ensure the entire output is less than 3600 tokens long and does not include an extra line at the end of the Markdown.
    '''
    return prompt


def generate_completion(prompt):
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        # model="gpt-4",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.2,
    )
    finish_reason = completion.choices[0]['finish_reason']
    message = completion.choices[0].message.content
    return finish_reason, message, completion


def generate_json(message, topic):
    message = message.strip()
    json_string = re.search(r'(?s){\s*\"[^"]+\":\s*{.*?}\s*}', message, re.DOTALL)
    
    if json_string:
        json_string = json_string.group()
        # json_string = json_string.lower()
        json_string = re.sub(r',\s*([\]}])', r'\1', json_string)
        json_object = json.loads(json_string)
        # title_words = json_object[topic]['title'].split()
        # result_title = [word.capitalize() if word.lower() not in stop_words else word for word in title_words]
        # result_title = " ".join(result_title)
        # json_object[topic]['title'] = result_title
        # print(json_object[topic]['title'])

        # if topic not in json_string:
        #     print("Error: Could not find topic in JSON.")
        #     exit(1)
        with open('wiki-connections.json', 'r') as file:
            existing_data = json.load(file)
        existing_data.update(json_object)
        with open('wiki-connections.json', 'w') as file:
            json.dump(existing_data, file, indent=4)
        return True
    else:
        print("Error: Could not extract JSON from message.")
        return False
        exit(1)


def generate_markdown(message, topic):
    if os.path.exists('data/' + topic + '.md'):
        print("Error: Markdown file already exists.")
        return False

    message = message.strip()
    markdown_start_pos = message.find('#')
    markdown_content = message[markdown_start_pos:].strip() + '\n'

    md_filename = topic + '.md'
    with open(md_filename, 'w') as file:
        file.write(markdown_content)

    destination_folder = 'data'
    shutil.move(md_filename, destination_folder)
    return True

def generate_js(topic):
    if os.path.exists('pages/' + topic + '.js'):
        print("Error: JS file already exists.")
        return False

    md_filename = topic + '.md'
    js_string = f'''
    import React from 'react';
    import path from 'path';
    import fs from 'fs';
    import PageContent from '@/components/PageContent/PageContent';

    const filename = '{md_filename}';

    export default function MarkdownPage({{ markdownContent }}) {{
    return <PageContent content={{markdownContent}} filename={{filename}} />;
    }}

    export async function getStaticProps() {{
    const filePath = path.join(process.cwd(), 'data', filename);
    const markdownContent = fs.readFileSync(filePath, 'utf8');
    return {{
        props: {{
        markdownContent,
        }},
    }};
    }}
    '''
    js_filename = topic + '.js'
    with open(js_filename, 'w') as file:
        file.write(js_string)

    destination_folder = 'pages'
    shutil.move(js_filename, destination_folder)
    return True


def extract_markdown(message):
    markdown_start = message.find('```')
    markdown_end = message.rfind('```')
    markdown_string = message[markdown_start:markdown_end+3]
    return markdown_string


def save_visited_pages(visited_pages, file_name='visited_pages.pickle'):
    with open(file_name, 'wb') as handle:
        pickle.dump(visited_pages, handle, protocol=pickle.HIGHEST_PROTOCOL)


def load_visited_pages(file_name='visited_pages.pickle'):
    try:
        with open(file_name, 'rb') as handle:
            visited_pages = pickle.load(handle)
        return visited_pages
    except FileNotFoundError:
        return set()


In [3]:
visited_pages = load_visited_pages()
visited_pages.update([''])
save_visited_pages(visited_pages)

In [4]:
visited_pages = load_visited_pages()
queue = []
visited_pages.add('voxel-based_method')

if not queue:
    with open('wiki-connections.json', 'r') as file:
        data = json.load(file)
        for key in data:
            for new_topic in data[key]['prerequisites']:
                if os.path.exists('data/' + new_topic + '.md'):
                    visited_pages.add(new_topic)
                    continue
                if new_topic not in visited_pages and new_topic not in queue and new_topic not in data:
                    queue.append(new_topic)
            for new_topic in data[key]['further_readings']:
                if os.path.exists('data/' + new_topic + '.md'):
                    visited_pages.add(new_topic)
                    continue
                if new_topic not in visited_pages and new_topic not in queue and new_topic not in data:
                    queue.append(new_topic)
            if len(queue) > 0:
                break
print(queue)

while queue:
    topic = queue.pop(0)
    topic = topic.lower()
    topic = topic.replace("'", "")

    if topic in visited_pages:
        continue

    with open('wiki-connections.json', 'r') as file:
        data = json.load(file)
        if topic in data:
            continue
        while len(queue) == 0:
            for key in data:
                for new_topic in data[key]['prerequisites']:
                    if os.path.exists('data/' + new_topic + '.md'):
                        visited_pages.add(new_topic)
                        continue
                    if new_topic not in visited_pages and new_topic not in queue and new_topic != topic and new_topic not in data:
                        queue.append(new_topic)
                for new_topic in data[key]['further_readings']:
                    if os.path.exists('data/' + new_topic + '.md'):
                        visited_pages.add(new_topic)
                        continue
                    if new_topic not in visited_pages and new_topic not in queue and new_topic != topic and new_topic not in data:
                        queue.append(new_topic)

    print('NOW GENERATING:', topic)
    prompt = generate_prompt(topic)
    finish_reason, message, completion = generate_completion(prompt)
    print("FINISH_REASON:", finish_reason)
    print(message)

    if finish_reason != 'stop':
        print("Error: Did not finish generating.")
        exit(1)
    
    has_generated_json = generate_json(message, topic)
    has_generated_markdown = generate_markdown(message, topic)
    has_generated_js = generate_js(topic)

    visited_pages.add(topic)
    save_visited_pages(visited_pages)

    if not has_generated_json or not has_generated_markdown or not has_generated_js:
        exit(1)

    # with open('wiki-connections.json', 'r') as file:
    #     wiki_connections = json.load(file)
    #     queue += wiki_connections[topic]['prerequisites']
    #     queue += wiki_connections[topic]['further_readings']

    print('DONE GENERATING:', topic)


['imitation_learning_in_rl', 'off_policy_rl']
NOW GENERATING: imitation_learning_in_rl
FINISH_REASON: stop
{
    "imitation_learning_in_rl": {
        "title": "Imitation Learning in RL",
        "prerequisites": ["reinforcement_learning", "supervised_learning", "neural_networks"],
        "further_readings": ["inverse_reinforcement_learning", "imitation_learning_in_cv", "imitation_learning_in_nlp"]
    }
}

# Imitation Learning in RL

Imitation learning is a type of machine learning where an agent learns to perform a task by imitating the behavior of an expert. In reinforcement learning (RL), imitation learning is used to train an agent to perform a task by learning from the actions of an expert. This is done by providing the agent with a set of expert demonstrations, which it can use to learn a policy that mimics the expert's behavior.

## Reinforcement Learning

Reinforcement learning is a type of machine learning where an agent learns to perform a task by interacting with an enviro

KeyboardInterrupt: 