In [1]:
import nbformat
from nbconvert import MarkdownExporter
import re
def convert_ipynb_to_md(ipynb_file, md_file):
    with open(ipynb_file, 'r', encoding='utf-8') as f:
        nb = nbformat.read(f, nbformat.NO_CONVERT)

    exporter = MarkdownExporter()
    body, resources = exporter.from_notebook_node(nb)

    # Remove any pngs added simply from output of notebook (images should be explicity pulled from figs subdirectory)
    pattern = r'!\[png\]\(output.*\.png\)'
    body = re.sub(pattern, '', body)

    with open(md_file, 'w', encoding='utf-8') as f:
        f.write(body)




The below code will format any exercises in a markdown file to jekyll format. For the conversion to work, exercises need to be formatted as follows in markdown or ipython files...
```
#### EXERCISE_START: Name Of Exercise
line1 of exercise body (instructions for exercise)
line2 of exercise body (instructions for exercise)
...
##### CODE_START
print('hello')
print('goodbye')
##### CODE_END
##### OUTPUT_START
hello
goodbye
##### OUTPUT_END

lineN of exercise body (instructions for exercise)
#### EXERCISE_SOLUTION 
line1 of solution 
line2 of solution 
...
lineN of solution
#### EXERCISE_END 
```

Note
- beginning of exercise, solution, and end are indicated by a single hashtag followed by the location tag name
- make sure to captitalize all letters of EXERCISE_START, EXERCISE_SOLUTION, and EXERCISE_END
- **new problem discovered**: Careful when using numbered lists in an exercise. You need an empty newline following a numbered list for the markdown file to render correctly. In jupyter notebooks, leave a blank newline after a numbered list and everything should format correctly in the markdown.

Code formatting in exercises...
> ~~~
> it may include some code
> ~~~
> {: .source}

Output formatting in exercises...
> ~~~
> it may include some code
> ~~~
> {: .output}

In [2]:
def convert_exercises(input_file, save_output):
    converted_lines = []
    in_start = False
    in_solution = False
    # in_code = False
    edits_made = False
    with open(input_file, 'r', encoding='utf-8') as file:
        for line in file:
            if line.startswith("#### EXERCISE_START:"):
                edits_made = True
                in_start = True
                exercise_name = line.strip().replace("#### EXERCISE_START:", "").strip()
                if not save_output:
                    print('FORMATTING EXERCISE:',  exercise_name)
                converted_lines.append(f"> ## {exercise_name}")
            elif line.startswith("#### EXERCISE_SOLUTION"):
                edits_made = True
                in_start = False
                in_solution = True
                converted_lines.append("> > ## Solution")
                converted_lines.append("> >")
            elif line.startswith("#### EXERCISE_END"):
                edits_made = True
                in_solution = False
                converted_lines.append("> {:.solution}\n{:.challenge}")
                converted_lines.append("")
            elif line.startswith("##### CODE_START"):
                # in_code = True
                code_start_line = line.strip().replace("##### CODE_START", "~~~").strip()
                if in_start:
                    converted_lines.append(f"> {code_start_line}")
                elif in_solution:
                    converted_lines.append(f"> > {code_start_line}")
            elif line.startswith("##### CODE_END"):
                # in_code = True
                code_end_line1 = line.strip().replace("##### CODE_END", "~~~").strip()
                code_end_line2 = "{: .language-python}"
                if in_start:
                    converted_lines.append(f"> {code_end_line1}")
                    converted_lines.append(f"> {code_end_line2}")
                elif in_solution:
                    converted_lines.append(f"> > {code_end_line1}")
                    converted_lines.append(f"> > {code_end_line2}")
            elif line.startswith("##### OUTPUT_START"):
                # in_code = True
                out_start_line = line.strip().replace("##### OUTPUT_START", "~~~").strip()
                if in_start:
                    converted_lines.append(f"> {out_start_line}")
                elif in_solution:
                    converted_lines.append(f"> > {out_start_line}")
            elif line.startswith("##### OUTPUT_END"):
                # in_code = True
                out_end_line1 = line.strip().replace("##### OUTPUT_END", "~~~").strip()
                out_end_line2 = "{: .output}"
                if in_start:
                    converted_lines.append(f"> {out_end_line1}")
                    converted_lines.append(f"> {out_end_line2}")
                elif in_solution:
                    converted_lines.append(f"> > {out_end_line1}")
                    converted_lines.append(f"> > {out_end_line2}")
            elif in_start:
                converted_lines.append(f"> {line.strip()}")
            elif in_solution:
                converted_lines.append(f"> > {line.strip()}")
            else:
                converted_lines.append(line.rstrip())
                
    if edits_made and save_output:
        with open(input_file, 'w', encoding='utf-8') as file:
            file.write('\n'.join(converted_lines))
    elif edits_made:
        print()
        print("Preview of file conversion...")
        for line in converted_lines:
            print(line)
    else:
        print('No exercises (pre-formatted) were detected in this markdown file.')
        


In [3]:
# # 1) specify episode being converted
# episode_name = '03-Regression-intro' 
# episode_name = '04-Relevant-predictors-and-coef-bias'
# # episode_name = '05-Regression-assumptions' 
# # episode_name = '06-High-dim-data-prep' 
# # episode_name = '08-Regularization' 
# episode_name = '07-c-Multivariate_Regression_with_PCA'
# episode_name = '07-d-Feature_Selection_and_PCA-How_It_Works_Example'
# episodes = ['03-Regression-intro', '04-Relevant-predictors-and-coef-bias'], '05-Regression-assumptions' ]
episodes = ['04-Relevant-predictors-and-coef-bias', '05-Regression-assumptions']
episodes = ['05-Regression-assumptions']
episodes = ['02-Intro-to-Ames-Housing-Dataset']
episodes = ['03-Regression-intro', '04-Relevant-predictors-and-coef-bias', '05-Regression-assumptions' ]
episodes = ['05-Regression-assumptions']
episodes = ['02-Intro-to-Ames-Housing-Dataset', '03-Regression-intro', '04-Relevant-predictors-and-coef-bias', '05-Regression-assumptions', '06-Hypothesis-testing']
episodes = ['06-Hypothesis-testing']
episodes = ['03-Regression-intro']
episodes = ['04-Relevant-predictors-and-coef-bias']
episodes = ['04-Relevant-predictors-and-coef-bias', '05-Regression-assumptions']
episodes = ['02-Intro-to-Ames-Housing-Dataset', '03-Regression-intro', '04-Relevant-predictors-and-coef-bias', '05-Regression-assumptions', '06-Hypothesis-testing']
episodes

['02-Intro-to-Ames-Housing-Dataset',
 '03-Regression-intro',
 '04-Relevant-predictors-and-coef-bias',
 '05-Regression-assumptions',
 '06-Hypothesis-testing']

In [4]:
# 2) define output path - episodes folder
import os
curr_dir = os.getcwd()
out_path = curr_dir + '\\..\\_episodes\\'
out_path

'C:\\Users\\Endemann\\Documents\\GitHub\\high-dim-data-lesson\\code\\..\\_episodes\\'

In [5]:
for episode_name in episodes:
    # 3) convert to markdown
    input_ipynb_file = episode_name + '.ipynb'
    output_md_file = out_path + episode_name + '.md'
    convert_ipynb_to_md(input_ipynb_file, output_md_file) 
    convert_exercises(output_md_file, save_output=True) 
    

No exercises (pre-formatted) were detected in this markdown file.
