In [76]:
!python setup.py
!python topic_sorting.py

100%|███████████████████████████████████████| 143/143 [00:00<00:00, 2010.71it/s]


In [77]:
import pandas as pd
import numpy as np
import re
import os
from tqdm.auto import tqdm
import csv

# topics is a list of strings, each string is a topic which comes from the list of file names in the "specific_topics" folder
TOPICS = os.listdir("specific_topics") # get the list of file names in the "specific_topics" folder
TOPICS = [topic.replace(".md", "") for topic in TOPICS] # remove the ".md" from the file names
TOPICS = [topic.replace("_", " ") for topic in TOPICS] # replace the underscores with spaces


In [78]:
######## Fresh Start ########
# Starting with a csv already created and populated with prompts
# we want to distribute those prompts into markdown files that are named by the topic. To do this we need to categorize the prompts by topic.
# We can make a new column in the csv file that is the topic, and then we can use that to populate the markdown files.
# We can also use the topic column to make sure that the prompts are only added to the markdown files that are named after the topic.
# Step 1: Create a new column in the csv file that is the topic
# open the csv file
prompts_df = pd.read_csv("prompts.csv")

# Step 2: Populate the markdown files with the prompts from the csv file
prompts_df.head()

Unnamed: 0,topic,prompt,contributor,link
0,Using ChatGPT Desktop App,**Desktop App is an unofficial [open source pr...,,
1,Act as a Linux Terminal,i want you to act as a linux terminal. I will ...,https://github.com/f,
2,Act as an English Translator and Improver,"I want you to act as an English translator, sp...",https://github.com/f,
3,Act as `position` Interviewer,I want you to act as an interviewer. I will be...,https://github.com/f,
4,Act as a JavaScript Console,I want you to act as a javascript console. I w...,,


In [79]:
df = prompts_df.copy()
df.head()

Unnamed: 0,topic,prompt,contributor,link
0,Using ChatGPT Desktop App,**Desktop App is an unofficial [open source pr...,,
1,Act as a Linux Terminal,i want you to act as a linux terminal. I will ...,https://github.com/f,
2,Act as an English Translator and Improver,"I want you to act as an English translator, sp...",https://github.com/f,
3,Act as `position` Interviewer,I want you to act as an interviewer. I will be...,https://github.com/f,
4,Act as a JavaScript Console,I want you to act as a javascript console. I w...,,


In [80]:
df['full_text'] = df['topic'] + " " + df['prompt']
df['full_text'] = df['full_text'].str.lower()
df.head()

Unnamed: 0,topic,prompt,contributor,link,full_text
0,Using ChatGPT Desktop App,**Desktop App is an unofficial [open source pr...,,,using chatgpt desktop app **desktop app is an ...
1,Act as a Linux Terminal,i want you to act as a linux terminal. I will ...,https://github.com/f,,act as a linux terminal i want you to act as a...
2,Act as an English Translator and Improver,"I want you to act as an English translator, sp...",https://github.com/f,,act as an english translator and improver i wa...
3,Act as `position` Interviewer,I want you to act as an interviewer. I will be...,https://github.com/f,,act as `position` interviewer i want you to ac...
4,Act as a JavaScript Console,I want you to act as a javascript console. I w...,,,act as a javascript console i want you to act ...


In [81]:
import pandas as pd

# extract everything after "Act as a" and assign it to the new column "category"
df['actor_name'] = df['topic'].str.extract(r'Act as[ a| ](.*)')
# remove any instances of 'a' or 'an' from the category column and replace them with an empty string in their place keeping the rest.
df['actor_name'] = df['actor_name'].str.replace(r' a | an ', ' ')
# remove double spaces
df['actor_name'] = df['actor_name'].str.replace(r'  ', ' ')

# replace any NaN values with 'General' in the category column
df['actor_name'] = df['actor_name'].fillna('General')
# make actor_name lowercase
df['actor_name'] = df['actor_name'].str.lower()
# Fill in NaN contributor values with 'None'
df['contributor'] = df['contributor'].fillna('None')

df.head()


  df['actor_name'] = df['actor_name'].str.replace(r' a | an ', ' ')


Unnamed: 0,topic,prompt,contributor,link,full_text,actor_name
0,Using ChatGPT Desktop App,**Desktop App is an unofficial [open source pr...,,,using chatgpt desktop app **desktop app is an ...,general
1,Act as a Linux Terminal,i want you to act as a linux terminal. I will ...,https://github.com/f,,act as a linux terminal i want you to act as a...,a linux terminal
2,Act as an English Translator and Improver,"I want you to act as an English translator, sp...",https://github.com/f,,act as an english translator and improver i wa...,an english translator and improver
3,Act as `position` Interviewer,I want you to act as an interviewer. I will be...,https://github.com/f,,act as `position` interviewer i want you to ac...,`position` interviewer
4,Act as a JavaScript Console,I want you to act as a javascript console. I w...,,,act as a javascript console i want you to act ...,a javascript console


In [82]:
# clear out the markdown files leaving the header line only
for topic in TOPICS:
    with open(f"specific_topics/{topic}.md", "w") as file:
        file.write(f"## Prompts for {topic}\n")



In [83]:
# scan actor_name column for words that correspond to the filenames in the "specific_topics" folder
# if a word is found, open that file and append the prompt to the file then close the file

for topic in tqdm(TOPICS, total = len(TOPICS), desc = "Writing prompts to files", colour="green"):
    for index, row in df.iterrows():
        if topic in row['actor_name']:
            with open(f"specific_topics/{topic}.md", "a") as f:
                f.write("## Prompt\n")
                f.write(f"{row['prompt']}\n")
                # if there is a contributor, add the contributor to the file
                if row['contributor'] != "None":
                    f.write(f"Contributor: {row['contributor']}\n")
                f.write("\n")


Writing prompts to files: 100%|[32m██████████[0m| 106/106 [00:00<00:00, 274.31it/s]


In [84]:
print(TOPICS)

['swift', 'machine learning', 'cryptography', 'cloud computing', 'parallel computing', 'front end development', 'arvr', 'functional programming', 'kubernetes', 'data analysis', 'english', 'kotlin', 'blockchain', 'desktop development', 'book', 'full stack development', 'data structures', 'mobile development', 'go', 'back end development', 'compilers', 'computer graphics', 'soa', 'python', 'monolithic', 'waterfall', 'concurrent programming', 'natural language processing', 'togaf', 'networking', 'procedural programming', 'c', 'data engineering', 'event-driven programming', 'android', 'generator', 'data science', 'vba', 'computer architecture', 'regex', 'iot', 'diet', 'scala', 'rust', 'pmp', 'data visualization', 'creator', 'kanban', 'scrum', 'php', 'sql', 'windows', 'javascript', 'linux', 'testing', 'visio', 'lean', 'qlikview', 'embedded systems', 'algorithms', 'macos', 'advice', 'coach', 'objective-c', 'firmware', 'r', 'itil', 'java', 'post', 'microservices', 'excel formulas', 'c++', 'go

In [85]:
import os
import re

def extract_category(text):
    # Define a list of possible categories
    categories = ["software developer", "web developer", "system administrator"]
    # Define a list of specific topics to look for
    specific_topics = ["Golang", "Angular", "JWT"]
    # Initialize a dictionary to store the scores for each category
    scores = {category: 0 for category in categories}
    
    # Check for specific topics in the text
    for topic in specific_topics:
        if topic in text:
            scores["software developer"] += 1
    
    # Check for keywords associated with each category
    if re.search(r"web app", text):
        scores["web developer"] += 1
    if re.search(r"system|admin", text):
        scores["system administrator"] += 1
    
    # Check for specific file names in the specific_topics directory
    for topic in specific_topics:
        if os.path.exists("specific_topics/" + topic):
            scores["software developer"] += 1
    
    # Return the category with the highest score
    return max(scores, key=scores.get)


In [86]:
import os
import re

def extract_category(text):
    # Get the list of file names in the specific_topics directory as categories
    categories = os.listdir("specific_topics")
    # Initialize a dictionary to store the scores for each category
    scores = {category: 0 for category in categories}
    
    # Search for the phrase "I want you to act like a" in the text
    match = re.search(r"I want you to act like a (\w+)", text)
    if match:
        # Extract the next word or set of words after the phrase as the category
        category = match.group(1)
        # check if the category is in the list of file names in the specific_topics directory
        if category in categories:
            category = category.replace(".md", "") # remove the ".md" from the file names
            category = category.replace("_", " ") # replace the underscores with spaces
            category = category.lower() # make the category lowercase
            scores[category] += 1
    else:
        # make the text lowercase
        text = text.lower()
        # check the text for keywords associated with each category and increment the score for that category
        for category in categories:
            category = category.replace(".md", "") # remove the ".md" from the file names
            category = category.replace("_", " ") # replace the underscores with spaces
            category = category.lower() # make the category lowercase
            if re.search(r"\b{}\b".format(category), text):
                # if there is no score yet for this category, add one
                try:
                    scores[category] += 1
                # if there is already a score for this category, increment it by one
                except Exception as e:
                    print(e)
    # Return the category with the highest score
    return max(scores, key=scores.get)


In [87]:
test_paragraph = "I want you to act like a mathematician. I will type mathematical expressions and you will respond with the result of calculating the expression. I want you to answer only with the final amount and nothing else. Do not write explanations. When I need to tell you something in English, I'll do it by putting the text inside square brackets {like this}. My first expression is: 4+5"

extract_category(test_paragraph)

'swift.md'