In [1]:
import openai
import shutil
import re
import json
import os
import pickle
from dotenv import load_dotenv

load_dotenv('.env.local')
openai.api_key = os.getenv("OPENAI_API_KEY")

In [4]:
def generate_prompt(topic):
    parts = topic.split('_')
    parts = [part.capitalize() for part in parts]
    topic = ' '.join(parts)
    prompt = f'Topic: {topic}\n' + '''
    You are a world-renowned AI and ML expert.
    Provide a JSON object containing the topic, a list of 1-12 prerequisite topics, and a list of 1-12 further readings related to AI, ML, and DL. 
    Ensure that the prerequisites and further readings are specifically relevant to the given, rather than broad topics like calculus or statistics.
    Ensure that the title field is properly capitalized and spaced and has the right punctuation (such as Q-Learning).
    Also ensure that the topic, prerequisites, and further readings are in snake_case.
    Use a similar format to the example provided below.:

    Example:
    {
        "generative_adversarial_network": {
            "title": "Generative Adversarial Network",
            "prerequisites": ["expectation_maximization_algorithm", "probability_distributions", "convolutional_neural_networks", "backpropagation", "stochastic_gradient_descent", "loss_functions", "optimization_algorithms", "deep_learning_frameworks", "regularization_techniques", "unsupervised_learning"],
            "further_readings": ["conditional_gans", "cycle_gans", "stylegan_and_stylegan2", "wasserstein_gans", "domain_adaptation", "image_to_image_translation", "semi_supervised_learning", "adversarial_training", "adversarial_attacks_and_defenses", "transfer_learning"]
        }
    }
    Next, write a detailed wiki page about the given topic in Markdown format. Always write from a third-person perspective and remain unopinionated.
    Ensure that this wiki page is explicitly in code format. 
    Do not include a "Contents" section. 
    Do not include a "Further Readings" nor a "Prerequisites" section if they just include related topics.
    Use a neutral, unbiased tone without exclamation marks. 
    Ensure that the heading is the same as the title in the JSON object.
    Follow Markdown syntax for headings and formatting, and use LaTeX for equations, with inline equations in pairs of $ and multiline equations in $$.
    Ensure the entire output is less than 3600 tokens long and does not include an extra line at the end of the Markdown.
    '''
    return prompt


def generate_completion(prompt):
    completion = openai.ChatCompletion.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "user", "content": prompt}
        ],
        temperature=0.7,
    )
    finish_reason = completion.choices[0]['finish_reason']
    message = completion.choices[0].message.content
    return finish_reason, message, completion


def generate_json(message, topic):
    message = message.strip()
    json_string = re.search(r'(?s){\s*\"[^"]+\":\s*{.*?}\s*}', message, re.DOTALL)
    
    if json_string:
        json_string = json_string.group()
        json_string = json_string.lower()
        json_object = json.loads(json_string)

        if topic not in json_string:
            print("Error: Could not find topic in JSON.")
            exit(1)

        with open('wiki-connections.json', 'r') as file:
            existing_data = json.load(file)

        existing_data.update(json_object)
        
        with open('wiki-connections.json', 'w') as file:
            json.dump(existing_data, file, indent=4)
    else:
        print("Error: Could not extract JSON from message.")
        exit(1)


def generate_markdown(message, topic):
    message = message.strip()
    markdown_start_pos = message.find('#')
    markdown_content = message[markdown_start_pos:].strip() + '\n'

    md_filename = topic + '.md'
    with open(md_filename, 'w') as file:
        file.write(markdown_content)

    destination_folder = 'data'
    shutil.move(md_filename, destination_folder)


def generate_js(topic):
    md_filename = topic + '.md'
    js_string = f'''
    import React from 'react';
    import path from 'path';
    import fs from 'fs';
    import PageContent from '@/components/PageContent/PageContent';

    const filename = '{md_filename}';

    export default function MarkdownPage({{ markdownContent }}) {{
    return <PageContent content={{markdownContent}} filename={{filename}} />;
    }}

    export async function getStaticProps() {{
    const filePath = path.join(process.cwd(), 'data', filename);
    const markdownContent = fs.readFileSync(filePath, 'utf8');
    return {{
        props: {{
        markdownContent,
        }},
    }};
    }}
    '''
    js_filename = topic + '.js'
    with open(js_filename, 'w') as file:
        file.write(js_string)

    destination_folder = 'pages'
    shutil.move(js_filename, destination_folder)


def extract_markdown(message):
    markdown_start = message.find('```')
    markdown_end = message.rfind('```')
    markdown_string = message[markdown_start:markdown_end+3]
    return markdown_string


def save_visited_pages(visited_pages, file_name='visited_pages.pickle'):
    with open(file_name, 'wb') as handle:
        pickle.dump(visited_pages, handle, protocol=pickle.HIGHEST_PROTOCOL)


def load_visited_pages(file_name='visited_pages.pickle'):
    try:
        with open(file_name, 'rb') as handle:
            visited_pages = pickle.load(handle)
        return visited_pages
    except FileNotFoundError:
        return set()


In [3]:
visited_pages = load_visited_pages()
queue = ['imitation_learning']

while queue:
    topic = queue.pop(0)
    topic = topic.lower()
    if topic in visited_pages:
        continue

    print('NOW GENERATING:', topic)
    prompt = generate_prompt(topic)
    finish_reason, message, completion = generate_completion(prompt)
    print(finish_reason)
    print(message)

    if finish_reason != 'stop':
        continue
    
    generate_json(message, topic)
    generate_markdown(message, topic)
    generate_js(topic)

    visited_pages.add(topic)
    save_visited_pages(visited_pages)

    with open('wiki-connections.json', 'r') as file:
        wiki_connections = json.load(file)
        queue += wiki_connections[topic]['prerequisites']
        queue += wiki_connections[topic]['further_readings']

    print('DONE GENERATING:', topic)


NOW GENERATING: policy_gradient_methods
stop
{
    "policy_gradient_methods": {
        "title": "Policy Gradient Methods",
        "prerequisites": ["reinforcement_learning", "markov_decision_process", "value_iteration", "q_learning", "monte_carlo_tree_search", "neural_networks", "backpropagation", "stochastic_gradient_descent", "optimization_algorithms", "gradient_descent", "convolutional_neural_networks"],
        "further_readings": ["actor_critic_methods", "proximal_policy_optimization", "trust_region_policy_optimization", "asynchronous_advantage_actor_critic", "deep_deterministic_policy_gradient", "dual_learning", "imitation_learning", "reinforcement_learning_with_unsupervised_auxiliary_tasks", "hierarchical_reinforcement_learning", "multi_agent_reinforcement_learning"]
    }
}

# Policy Gradient Methods

Policy Gradient Methods are a class of reinforcement learning techniques used to train an agent to learn an optimal policy for a given task. In contrast to value-based methods, 

KeyError: 'title'