# Thought Organizer

In [58]:
import configparser
from typing import List
from io import StringIO

from openai import OpenAI
import pandas as pd

## Tags

### Helpers

In [63]:
def load_cards(filename: str) -> pd.DataFrame:
    return pd.read_csv(filename)

def csv_to_dataframe(csv: str, sep: str = ";"):
    stream = StringIO(csv)
    return pd.read_csv(stream, sep=sep)

def select_decks(
    cards: pd.DataFrame,
    deck_column: str = "decks",
    decks: List[str] = None,
) -> pd.DataFrame:
    if decks is None:
        return cards
    if not isinstance(decks, list):
        raise ValueError(f"Wrong type for decks: {type(decks)}. Expected List.")
                         
    valid_rows = cards[deck_column].isin(decks)
    return cards[valid_rows]

def generate_tags(
    client: OpenAI,
    cards: pd.DataFrame,
) -> str:

    MAX_NO_TAGS = 3
    TEXT_COLUMNS = "front,back,tags"
    NEW_COLUMN = "new_tags"
    prompt = f"""Below is a csv where a row represents the front and back of a virtual quiz card.
             Each card has a set of tags associated with it, and some other metadata.
             
             Please revise the existing tags, based on the text in the columns {TEXT_COLUMNS}.
             
             Strive for generic tags, using standard and well established terms.
             Try to keep a low number of unique tags.
             Each card can have a maximum of {MAX_NO_TAGS} tags.
             Merge similar tags.
             
             The output should be the same csv, with an additional column '{NEW_COLUMN}'.
             '{NEW_COLUMN}' should contain your suggestions.
             The response should not contain any text except the csv.

             ---

             {cards.to_csv(index=False, sep=";")}
             """
    MODEL = "gpt-3.5-turbo-16k"
    response = client.chat.completions.create(
        model=MODEL,
        messages=[
            {"role": "user", "content": prompt},
        ],
    )
    MESSAGE = 0
    return response.choices[MESSAGE].message.content

def process_cards(input_filename: str, output_filename: str) -> None:
    df_cards = load_cards(input_filename)    
    df_cards_new_tags = generate_tags(df_cards)
    # df_with_tags.to_csv(output_filename, index=False)
    # print(f"File saved to {output_filename}")

## Main

In [64]:
config = configparser.ConfigParser()
config.read("config.ini")

openai_client = OpenAI(
    api_key=config["OpenAI"]["API_KEY"],
)

input_filename: str = config["Files"]["INPUT_FILE"]

In [65]:
df = load_cards(input_filename)
df.head(5)

Unnamed: 0,front,back,tags,sources,decks,id
0,What are three good practices to design for *t...,* Modular design\n* Use of interfaces\n* Depen...,testability,https://www.codereliant.io/making-software-rel...,Software Architecture,OW2tfrufw32pG18zT0mv
1,What strategies can help us aspire toward euda...,* Know your values and aspire to live a value-...,eudaimonic happiness,https://positivepsychology.com/hedonic-vs-euda...,Wellbeing,d7M4BV1RcboL8AztnljO
2,What is *testability*?,How easily a software can be tested.,testability,https://www.codereliant.io/making-software-rel...,Software Architecture,ubHXLVPhdzOWaKLCWv82
3,"What are ""tidyings""?",Tidyings are a subset of refactorings. \n\nThe...,"definition,tidyings","Tidy First?\nBeck, Kent",Tidy First?,sFSx9aj5sgprh8SltBHB
4,What is the order of magnitude time wise when ...,Tidying is a minutes-to-an-hour kind of activi...,decision-making,"Tidy First?\nBeck, Kent",Tidy First?,vVwpzxqKFEdj67Lm8np6


In [66]:
df_selected_decks = select_decks(df, decks=["Tidy First?"])

In [51]:
csv_string = generate_tags(openai_client, df_selected_decks)

In [None]:
df_new_tags = csv_to_dataframe(csv_string)