# Produce proficiency guideline descriptions.

In [26]:
def parse_markdown(file_path):
    result_dict = {}
    current_title = None
    current_content = ""

    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()

            if line.startswith("# "):
                # Found a title
                if current_title:
                    result_dict[current_title] = current_content.strip()
                    current_content = ""
                current_title = line[2:]
            elif line.startswith("## "):
                # Found a subtitle
                subtitle = line[3:]
                if current_title:
                    result_dict[current_title] = current_content.strip()
                    current_content = ""
                current_title = f"{subtitle}"
            else:
                # Accumulate content
                current_content += line + " "

    # Add the last entry
    if current_title:
        result_dict[current_title] = current_content.strip()
        
    # Add main level content to each sublevel
    for key in result_dict.keys():
        if len(key.split(" ")) > 1 and key.split(" ")[0] in result_dict.keys():
            result_dict[key] = result_dict[key.split(" ")[0]] + " " + result_dict[key]
            
    # Remove levels that have sublevels
    keys_to_delete = []
    for key in result_dict.keys():
        if len(key.split(" ")) > 1 and key.split(" ")[0] in result_dict.keys():
            keys_to_delete.append(key.split(" ")[0])
    for key in keys_to_delete:
        if key in result_dict.keys():
            del result_dict[key]

    return result_dict


# Example usage
file_path = '../proficiency-guidelines.md'
parsed_dict = parse_markdown(file_path)

parsed_dict

{'Distinguished': 'Speakers at the Distinguished level are able to use language skillfully and with accuracy, efficiency, and effectiveness. They are educated and articulate users of the language, capable of reflecting on a wide range of global issues and highly abstract concepts in a culturally appropriate manner. Distinguished-level speakers can use persuasive and hypothetical discourse for representational purposes, allowing them to advocate a point of view that is not necessarily their own. They can tailor language to a variety of audiences by adapting their speech and register in ways that are culturally authentic.  Speakers at the Distinguished level produce highly sophisticated and tightly organized extended discourse. At the same time, they can speak succinctly, often using cultural and historical references to allow them to say less and mean more. At this level, oral discourse typically resembles written discourse. A non-native accent, a lack of a native-like economy of expres

In [31]:
import os

if not os.path.exists("../data/guideline-descriptions"):
    os.makedirs("../data/guideline-descriptions")

for key in parsed_dict.keys():
    with open(f"../data/guideline-descriptions/{key}.txt", 'w') as file:
        file.write(parsed_dict[key])