In [66]:
!python setup.py
!python topic_sorting.py

100%|█████████████████████████████████████████| 89/89 [00:00<00:00, 1144.02it/s]


In [67]:
import pandas as pd
import numpy as np
import re
import os
from tqdm.auto import tqdm
import csv

# topics is a list of strings, each string is a topic which comes from the list of file names in the "specific_topics" folder
TOPICS = os.listdir("specific_topics") # get the list of file names in the "specific_topics" folder
TOPICS = [topic.replace(".md", "") for topic in TOPICS] # remove the ".md" from the file names
TOPICS = [topic.replace("_", " ") for topic in TOPICS] # replace the underscores with spaces


  from .autonotebook import tqdm as notebook_tqdm


In [68]:
######## Fresh Start ########
# Starting with a csv already created and populated with prompts
# we want to distribute those prompts into markdown files that are named by the topic. To do this we need to categorize the prompts by topic.
# We can make a new column in the csv file that is the topic, and then we can use that to populate the markdown files.
# We can also use the topic column to make sure that the prompts are only added to the markdown files that are named after the topic.
# Step 1: Create a new column in the csv file that is the topic
# open the csv file
prompts_df = pd.read_csv("prompts.csv")

# Step 2: Populate the markdown files with the prompts from the csv file
prompts_df.head()

Unnamed: 0,topic,prompt,contributor,link
0,Act As A Tea-Taster,Want somebody experienced enough to distinguis...,,
1,Act as an Interior Decorator,I want you to act as an interior decorator. Te...,,
2,Act As A Florist,Calling out for assistance from knowledgeable ...,,
3,Act as a Self-Help Book,I want you to act as a self-help book. You wil...,,
4,Act as a Gnomist,I want you to act as a gnomist. You will provi...,,


In [69]:
df = prompts_df.copy()
df.head()

Unnamed: 0,topic,prompt,contributor,link
0,Act As A Tea-Taster,Want somebody experienced enough to distinguis...,,
1,Act as an Interior Decorator,I want you to act as an interior decorator. Te...,,
2,Act As A Florist,Calling out for assistance from knowledgeable ...,,
3,Act as a Self-Help Book,I want you to act as a self-help book. You wil...,,
4,Act as a Gnomist,I want you to act as a gnomist. You will provi...,,


In [70]:
df['full_text'] = df['topic'] + " " + df['prompt']
df['full_text'] = df['full_text'].str.lower()
df.head()

Unnamed: 0,topic,prompt,contributor,link,full_text
0,Act As A Tea-Taster,Want somebody experienced enough to distinguis...,,,act as a tea-taster want somebody experienced ...
1,Act as an Interior Decorator,I want you to act as an interior decorator. Te...,,,act as an interior decorator i want you to act...
2,Act As A Florist,Calling out for assistance from knowledgeable ...,,,act as a florist calling out for assistance fr...
3,Act as a Self-Help Book,I want you to act as a self-help book. You wil...,,,act as a self-help book i want you to act as a...
4,Act as a Gnomist,I want you to act as a gnomist. You will provi...,,,act as a gnomist i want you to act as a gnomis...


In [71]:
import pandas as pd

# extract everything after "Act as a" and assign it to the new column "category"
df['actor_name'] = df['topic'].str.extract(r'Act as[ a| ](.*)')
# remove any instances of 'a' or 'an' from the category column and replace them with an empty string in their place keeping the rest.
df['actor_name'] = df['actor_name'].str.replace(r' a | an ', ' ')
# remove double spaces
df['actor_name'] = df['actor_name'].str.replace(r'  ', ' ')

# replace any NaN values with 'General' in the category column
df['actor_name'] = df['actor_name'].fillna('General')
# make actor_name lowercase
df['actor_name'] = df['actor_name'].str.lower()
# Fill in NaN contributor values with 'None'
df['contributor'] = df['contributor'].fillna('None')

df.actor_name.value_counts()


  df['actor_name'] = df['actor_name'].str.replace(r' a | an ', ' ')


general                              2
a machine learning engineer          1
a legal advisor                      1
a personal chef                      1
a virtual doctor                     1
                                    ..
a lunatic                            1
an it architect                      1
an academician                       1
a developer relations consultant:    1
a life coach                         1
Name: actor_name, Length: 82, dtype: int64

In [72]:
# clear out the markdown files leaving the header line only
for topic in TOPICS:
    with open(f"specific_topics/{topic}.md", "w") as file:
        file.write(f"## Prompts for {topic}\n")



In [73]:
# scan actor_name column for words that correspond to the filenames in the "specific_topics" folder
# if a word is found, open that file and append the prompt to the file then close the file

for topic in tqdm(TOPICS, total = len(TOPICS), desc = "Writing prompts to files", colour="green"):
    for index, row in df.iterrows():
        if topic in row['actor_name']:
            with open(f"specific_topics/{topic}.md", "a") as f:
                f.write("## Prompt\n")
                write_string = "```output\n" + row['prompt'] + "\n```\n"
                f.write(write_string)
                # if there is a contributor, add the contributor to the file
                if row['contributor'] != "None":
                    f.write(f"Contributor: {row['contributor']}\n")
                f.write("\n")


Writing prompts to files: 100%|[32m██████████[0m| 143/143 [00:00<00:00, 375.15it/s]


In [74]:
# Now parse every word in each prompt for the words in the "industries" folder and do the same thing as above for those words in the markdown files in the "industries" folder
for industry in tqdm(os.listdir("industries"), total = len(os.listdir("industries")), desc = "Writing prompts to files", colour="green"):
    for index, row in df.iterrows():
        if industry in row['full_text']:
            with open(f"industries/{industry}.md", "a") as f:
                f.write("## Prompt\n")
                write_string = "```output\n" + row['prompt'] + "\n```\n"
                f.write(write_string)
                # if there is a contributor, add the contributor to the file
                if row['contributor'] != "None":
                    f.write(f"Contributor: {row['contributor']}\n")
                f.write("\n")

Writing prompts to files: 100%|[32m██████████[0m| 67/67 [00:00<00:00, 363.36it/s]


In [75]:
# now add each row of the df to markdown files in the "actors" folder based on the actor_name column where the actor_name is the name of the file
for actor in tqdm(df['actor_name'].unique(), total = len(df['actor_name'].unique()), desc = "Writing prompts to files", colour="green"):
    for index, row in df.iterrows():
        if actor in row['actor_name']:
            with open(f"actors/{actor}.md", "a") as f:
                f.write("## Prompt\n")
                write_string = "```output\n" + row['prompt'] + "\n```\n"
                f.write(write_string)
                # if there is a contributor, add the contributor to the file
                if row['contributor'] != "None":
                    f.write(f"Contributor: {row['contributor']}\n")
                f.write("\n")


Writing prompts to files: 100%|[32m██████████[0m| 82/82 [00:00<00:00, 304.28it/s]


In [76]:
print(TOPICS)

['swift', 'machine learning', 'cryptography', 'cloud computing', 'parallel computing', 'front end development', 'arvr', 'functional programming', 'kubernetes', 'data analysis', 'english', 'kotlin', 'big data', 'blockchain', 'desktop development', 'book', 'full stack development', 'data structures', 'mobile development', 'go', 'back end development', 'compilers', 'computer graphics', 'soa', 'python', 'monolithic', 'waterfall', 'concurrent programming', 'data science', 'pointers in computer science', 'natural language processing', 'togaf', 'networking', 'procedural programming', 'c', 'data engineering', 'event-driven programming', 'android', 'professor', 'object-oriented programming', 'generator', 'data science', 'vba', 'computer architecture', 'regex', 'iot', 'natural language processing', 'diet', 'physicist', 'scala', 'rust', 'data structures', 'deep learning', 'pmp', 'data visualization', 'parallel computing', 'cloud computing', 'excel formulas', 'creator', 'kanban', 'specific topics 

In [77]:
# Now for every file in `specific_topics` folder, add a badge to the readme. The badge will be a link to the file.
!pip install markdown #  for markdown parsing
!pip install pybadges # for badges
!pip install black  # for code formatting
!pip install mistune # for markdown parsing
!pip install pygments # for syntax highlighting



In [78]:
import random
import markdown
import black


def combine_readme(top, middle, bottom):
    # Create dynamic badges for middle section
    badges = []
    for i in range(3):
        badge_color = random.choice(["green", "orange", "red"])
        badge_text = f"Badge {i + 1}"
        badge_link = f"/specific_topics/badge_{i + 1}.md"
        badge = f"[![{badge_text}](https://img.shields.io/badge/{badge_text}-{badge_color})]({badge_link})"
        badges.append(badge)

    # Concatenate top, middle, and bottom sections
    combined_readme = top + "\n\n" + "\n".join(badges) + "\n\n" + bottom

    return formatted_readme


# Read in the top, middle, and bottom sections of the README
with open("docs/section_1.md", "r") as file:
    top = file.read()

with open("docs/section_2.md", "r") as file:
    middle = file.read()
    # Format with black
    middle = black.format_str(middle, mode=black.FileMode())

with open("docs/section_3.md", "r") as file:
    bottom = file.read()
    # Format with black
    bottom = black.format_str(bottom, mode=black.FileMode())

# Combine the sections
combined_readme = combine_readme(top, middle, bottom)

# Write the combined README to the root directory
with open("README.md", "w") as file:
    file.write(combined_readme)

# Now for every file in `specific_topics` folder, add a badge to the readme. The badge will be a link to the file.
for file in os.listdir("specific_topics"):
    # you can use the file name to create the badge text
    badge_text = file.replace(".md", "")
    # you can use the file name to create the badge link
    badge_link = f"/specific_topics/{file}"
    # you can use the file name to create the badge color
    badge_color = random.choice(["green", "orange", "red"])

InvalidInput: Cannot parse: 1:1: [![swift](https://img.shields.io/badge/-swift-grey)](./specific_topics/swift.md)

In [None]:
import random
import mistune
from bs4 import BeautifulSoup

def combine_readme(top, middle, bottom):
    # Convert markdown to html
    middle = mistune.markdown(middle)
    # Now you can use beautifulsoup to format the html
    soup = BeautifulSoup(middle, 'html.parser')
    # Perform any desired formatting on the html using beautifulsoup
    # ...
    # Convert the modified html back to markdown
    middle = soup.prettify()
    # Create dynamic badges for middle section
    badges = []
    for i in range(3):
        badge_color = random.choice(["green", "orange", "red"])
        badge_text = f"Badge {i + 1}"
        badge_link = f"/specific_topics/badge_{i + 1}.md"
        badge = f"[![{badge_text}](https://img.shields.io/badge/{badge_text}-{badge_color})]({badge_link})"
        badges.append(badge)

    # Concatenate top, middle, and bottom sections
    combined_readme = top + "\n\n" + "\n".join(badges) + "\n\n" + bottom



In [80]:
import random
import markdown
import black
from tqdm import tqdm

def combine_readme(top, middle, bottom):
    # Create dynamic badges for middle section
    badges = []
    # the files in the specific_topics folder are the topics
    badge_count = len(os.listdir("specific_topics"))
    for i in tqdm(range(badge_count)):
        badge_color = random.choice(["green", "orange", "red", "blue", "yellow", "pink", "purple", "grey", "blue", "blueviolet","brown", "darkgrey", "lightgreen", "darkgreen", "lightblue", "darkblue", "lightyellow", "darkyellow", "lightpink", "darkpink", "lightpurple", "darkpurple", "lightbrown", "darkbrown", "lightblack", "darkblack", "lightwhite", "darkwhite", "lightred", "darkred", "lightorange", "darkorange"])
        badge_text = f"Badge_{i + 1}"
        # get the name of the file
        badge_filename = os.listdir("specific_topics")[i] # get the name of the file
        badge_text = badge_filename.replace(".md", "") # remove the .md extension
        badge_text_two = badge_text.replace(" ", "_") # replace spaces with underscores
        badge_link = f"./specific_topics/{badge_text_two}.md"
        while ' ' in badge_text:
            badge_text = badge_text.replace(' ', '_')
        badge = f"[![{badge_text}](https://img.shields.io/badge/{badge_text}-{badge_color})]({badge_link})"
        badges.append(badge)
    # Concatenate top, middle, and bottom sections
    combined_readme = top + "\n\n" + "\n".join(badges) + "\n\n" + bottom
    return combined_readme


# Read in the top, middle, and bottom sections of the README
with open("docs/section_1.md", "r") as file:
    top = file.read()
    # Write the section to the top of the readme
    with open("README.md", "w") as file:
        file.write(top)

with open("docs/section_2.md", "r") as file:
    middle = file.read()
    # Convert to HTML
    middle = mistune.markdown(middle)
    # write the section to the bottom of the readme
    with open("README.md", "a") as file:
        file.write(middle)
    

with open("docs/section_3.md", "r") as file:
    bottom = file.read()
    # Format with black
    #bottom = black.format_str(bottom, mode=black.FileMode())
    # write the section to the bottom of the readme
    with open("README.md", "a") as file:
        file.write(bottom)
    
# Combine the sections
combined_readme = combine_readme(top, middle, bottom)

# Write the combined README to the root directory
with open("README.md", "w") as file:
    file.write(combined_readme)


100%|██████████| 143/143 [00:00<00:00, 7775.59it/s]
