In [102]:
!python setup.py
!python topic_sorting.py

100%|███████████████████████████████████████| 143/143 [00:00<00:00, 2312.57it/s]


In [103]:
import pandas as pd
import numpy as np
import re
import os
from tqdm.auto import tqdm
import csv

# topics is a list of strings, each string is a topic which comes from the list of file names in the "specific_topics" folder
TOPICS = os.listdir("specific_topics") # get the list of file names in the "specific_topics" folder
TOPICS = [topic.replace(".md", "") for topic in TOPICS] # remove the ".md" from the file names
TOPICS = [topic.replace("_", " ") for topic in TOPICS] # replace the underscores with spaces


In [104]:
######## Fresh Start ########
# Starting with a csv already created and populated with prompts
# we want to distribute those prompts into markdown files that are named by the topic. To do this we need to categorize the prompts by topic.
# We can make a new column in the csv file that is the topic, and then we can use that to populate the markdown files.
# We can also use the topic column to make sure that the prompts are only added to the markdown files that are named after the topic.
# Step 1: Create a new column in the csv file that is the topic
# open the csv file
prompts_df = pd.read_csv("prompts.csv")

# Step 2: Populate the markdown files with the prompts from the csv file
prompts_df.head()

Unnamed: 0,topic,prompt,contributor,link
0,Using ChatGPT Desktop App,**Desktop App is an unofficial [open source pr...,,
1,Act as a Linux Terminal,i want you to act as a linux terminal. I will ...,https://github.com/f,
2,Act as an English Translator and Improver,"I want you to act as an English translator, sp...",https://github.com/f,
3,Act as `position` Interviewer,I want you to act as an interviewer. I will be...,https://github.com/f,
4,Act as a JavaScript Console,I want you to act as a javascript console. I w...,,


In [105]:
df = prompts_df.copy()
df.head()

Unnamed: 0,topic,prompt,contributor,link
0,Using ChatGPT Desktop App,**Desktop App is an unofficial [open source pr...,,
1,Act as a Linux Terminal,i want you to act as a linux terminal. I will ...,https://github.com/f,
2,Act as an English Translator and Improver,"I want you to act as an English translator, sp...",https://github.com/f,
3,Act as `position` Interviewer,I want you to act as an interviewer. I will be...,https://github.com/f,
4,Act as a JavaScript Console,I want you to act as a javascript console. I w...,,


In [106]:
df['full_text'] = df['topic'] + " " + df['prompt']
df['full_text'] = df['full_text'].str.lower()
df.head()

Unnamed: 0,topic,prompt,contributor,link,full_text
0,Using ChatGPT Desktop App,**Desktop App is an unofficial [open source pr...,,,using chatgpt desktop app **desktop app is an ...
1,Act as a Linux Terminal,i want you to act as a linux terminal. I will ...,https://github.com/f,,act as a linux terminal i want you to act as a...
2,Act as an English Translator and Improver,"I want you to act as an English translator, sp...",https://github.com/f,,act as an english translator and improver i wa...
3,Act as `position` Interviewer,I want you to act as an interviewer. I will be...,https://github.com/f,,act as `position` interviewer i want you to ac...
4,Act as a JavaScript Console,I want you to act as a javascript console. I w...,,,act as a javascript console i want you to act ...


In [107]:
import pandas as pd

# extract everything after "Act as a" and assign it to the new column "category"
df['actor_name'] = df['topic'].str.extract(r'Act as[ a| ](.*)')
# remove any instances of 'a' or 'an' from the category column and replace them with an empty string in their place keeping the rest.
df['actor_name'] = df['actor_name'].str.replace(r' a | an ', ' ')
# remove double spaces
df['actor_name'] = df['actor_name'].str.replace(r'  ', ' ')

# replace any NaN values with 'General' in the category column
df['actor_name'] = df['actor_name'].fillna('General')
# make actor_name lowercase
df['actor_name'] = df['actor_name'].str.lower()
# Fill in NaN contributor values with 'None'
df['contributor'] = df['contributor'].fillna('None')

df.head()


  df['actor_name'] = df['actor_name'].str.replace(r' a | an ', ' ')


Unnamed: 0,topic,prompt,contributor,link,full_text,actor_name
0,Using ChatGPT Desktop App,**Desktop App is an unofficial [open source pr...,,,using chatgpt desktop app **desktop app is an ...,general
1,Act as a Linux Terminal,i want you to act as a linux terminal. I will ...,https://github.com/f,,act as a linux terminal i want you to act as a...,a linux terminal
2,Act as an English Translator and Improver,"I want you to act as an English translator, sp...",https://github.com/f,,act as an english translator and improver i wa...,an english translator and improver
3,Act as `position` Interviewer,I want you to act as an interviewer. I will be...,https://github.com/f,,act as `position` interviewer i want you to ac...,`position` interviewer
4,Act as a JavaScript Console,I want you to act as a javascript console. I w...,,,act as a javascript console i want you to act ...,a javascript console


In [108]:
# clear out the markdown files leaving the header line only
for topic in TOPICS:
    with open(f"specific_topics/{topic}.md", "w") as file:
        file.write(f"## Prompts for {topic}\n")



In [110]:
# scan actor_name column for words that correspond to the filenames in the "specific_topics" folder
# if a word is found, open that file and append the prompt to the file then close the file

for topic in tqdm(TOPICS, total = len(TOPICS), desc = "Writing prompts to files", colour="green"):
    for index, row in df.iterrows():
        if topic in row['actor_name']:
            with open(f"specific_topics/{topic}.md", "a") as f:
                f.write("## Prompt\n")
                write_string = "```output\n" + row['prompt'] + "\n```\n"
                f.write(write_string)
                # if there is a contributor, add the contributor to the file
                if row['contributor'] != "None":
                    f.write(f"Contributor: {row['contributor']}\n")
                f.write("\n")


Writing prompts to files: 100%|[32m██████████[0m| 143/143 [00:00<00:00, 282.59it/s]


In [111]:
print(TOPICS)

['swift', 'machine learning', 'cryptography', 'cloud computing', 'parallel computing', 'front end development', 'arvr', 'functional programming', 'kubernetes', 'data analysis', 'english', 'kotlin', 'big data', 'blockchain', 'desktop development', 'book', 'full stack development', 'data structures', 'mobile development', 'go', 'back end development', 'compilers', 'computer graphics', 'soa', 'python', 'monolithic', 'waterfall', 'concurrent programming', 'data science', 'pointers in computer science', 'natural language processing', 'togaf', 'networking', 'procedural programming', 'c', 'data engineering', 'event-driven programming', 'android', 'professor', 'object-oriented programming', 'generator', 'data science', 'vba', 'computer architecture', 'regex', 'iot', 'natural language processing', 'diet', 'physicist', 'scala', 'rust', 'data structures', 'deep learning', 'pmp', 'data visualization', 'parallel computing', 'cloud computing', 'excel formulas', 'creator', 'kanban', 'specific topics 