In [6]:
import sys
import os

post_dir = '_posts/'

In [7]:
# move asset files to assets folder
filelist = [ f for f in os.listdir(post_dir + 'assets/') if f.endswith(".png") or f.endswith(".jpg") or f.endswith(".gif") ]

for f in filelist:
    os.rename(post_dir + 'assets/' + f, 'assets/img/' + f)

In [8]:
# markdown file list in current dir
md_files = os.listdir(post_dir)
md_files = [f for f in md_files if f.endswith('.md')]

# Rename each md file with its creation date
# Example : 2023-01-01-Title.md
# except files with date in name

import re
from datetime import datetime

for md_file in md_files:
    if not re.search(r'\d{4}-\d{2}-\d{2}', md_file):
        stat = os.stat(post_dir + md_file)
        date = datetime.fromtimestamp(stat.st_birthtime)
        new_name = date.strftime('%Y-%m-%d-') + md_file
        os.rename(post_dir + md_file, post_dir + new_name)

# Change blank space to underscore in each file name
md_files = os.listdir(post_dir)
md_files = [f for f in md_files if f.endswith('.md')]
for md_file in md_files:
    new_name = md_file.replace(' ', '_')
    os.rename(post_dir + md_file, post_dir + new_name)
    

In [9]:
# Insert YAML header in each file
# Example:
# ---
# title: "Title"
# tags:
# - tag1
# - tag2
# category: 'Category'
# use_math: true
# ---
# if there is no YAML header

md_files = os.listdir(post_dir)
md_files = [f for f in md_files if f.endswith('.md')]

for md_file in md_files:
    with open(post_dir + md_file, 'r') as f:
        lines = f.readlines()
        if lines:
            if lines[0] != '---\n':
                title = md_file.split('.')[0].replace('_', ' ')
                # remove date in title
                title = re.sub(r'\d{4}-\d{2}-\d{2}-', '', title)
                new_lines = ['---\n',
                            'title: "' + title + '"\n',
                            'tags:\n',
                            '- tag1\n',
                            '- tag2\n',
                            'category: \'Category\'\n',
                            'use_math: true\n',
                            '---\n']
                lines = new_lines + lines
                with open(post_dir + md_file, 'w') as f:
                    f.writelines(lines)

In [10]:
# # Remove date in YAML title
# md_files = os.listdir(os.getcwd())
# md_files = [f for f in md_files if f.endswith('.md')]
# for md_file in md_files:
#     with open(md_file, 'r') as f:
#         lines = f.readlines()
#         if lines:
#             if lines[0] == '---\n':
#                 title = md_file.split('.')[0].replace('_', ' ')
#                 # remove date in title
#                 title = re.sub(r'\d{4}-\d{2}-\d{2}-', '', title)
#                 lines[1] = 'title: "' + title + '"\n'
#                 with open(md_file, 'w') as f:
#                     f.writelines(lines)

In [11]:
# Image file link editor for markdowns

import os
post_dir = '_posts/'

# read all markdown files
mdfiles = [f for f in os.listdir(post_dir) if f.endswith('.md')]
for mdfile in mdfiles:
    # read the file
    with open(os.path.join(post_dir, mdfile), 'r') as f:
        lines = f.readlines()

    # find the image links
    img_extensions = ['.jpg', '.png', '.gif', '.jpeg', '.svg', '.bmp']

    for i, line in enumerate(lines):
        if any([ext in line for ext in img_extensions]) and 'plt' not in line: # except for matplotlib plot code
            # extract the image name
            if 'img src' in line:
                img_name = line.split('src="')[1].split('"')[0] # for <img src="image.png"> style
            elif line.startswith('![['):
                img_name = line.split('[[')[1].split(']]')[0] # for ![[image.png]] style
            else:
                img_name = line.split('(')[1].split(')')[0] # for ![img](image.png) style
            
            img_name_only = img_name.split('/')[-1]
            
            # If there exists % in the image name, replace with blank space
            if '%20' in img_name_only:
                img_name_only = img_name_only.replace('%20', ' ')

            # find img name in the directory

            target_dir = 'assets/img/'

            for root, dirs, files in os.walk(target_dir):
                if img_name_only in files:
                    img_path = '/' + os.path.join(root, img_name_only)
                    # print(img_path)
                
                    # replace the image link
                    if line.startswith('![['):
                        line = f'![]({img_path})'
                    else:
                        line = line.replace(img_name, img_path)

                    # replace the line
                    lines[i] = line

                    # write the file
                    with open(os.path.join(post_dir, mdfile), 'w') as f:
                        f.writelines(lines)


In [13]:
# # Remove Table of Contents in each file

# post_dir = '_posts/'

# md_files = os.listdir(post_dir)
# md_files = [f for f in md_files if f.endswith('.md')]

# for md_file in md_files:
#     with open(os.path.join(post_dir, md_file), 'r') as f:
#         lines = f.readlines()
#     for i, line in enumerate(lines):
#         if line.startswith('---') and i != 0:
#             idx = i

#     YAML = lines[0:idx+1]
    
#     if lines[idx+1].startswith('- ') and '#' in lines[idx+1]:
#         cnt = 0
#         for i, line in enumerate(lines[idx+1:]):
#             if line.startswith('#'):
#                 cnt += 1

#         md = lines[idx+1+cnt:]

#         with open(os.path.join(post_dir, md_file), 'w') as f:
#             f.writelines(YAML)
#             f.writelines(md)

#             print('Done: {}'.format(md_file))
            

In [14]:
# # Jekyll liquid syntax error
# # Add {% raw %} at beginning and {% endraw %} at last line

# post_dir = '_posts/'

# md_files = os.listdir(post_dir)
# md_files = [f for f in md_files if f.endswith('.md')]

# for md_file in md_files:
#     with open(os.path.join(post_dir, md_file)) as f:
#         lines = f.readlines()

#     for i, line in enumerate(lines):
#         if line.startswith('---') and i != 0:
#             idx = i

#     YAML = lines[0:idx+1]
#     md = lines[idx+1:]

#     with open(os.path.join(post_dir, md_file), 'w') as f:
#         f.writelines(YAML)
#         f.write('{% raw %}\n')
#         f.writelines(md)
#         f.write('\n')
#         f.write('{% endraw %}')

In [15]:
# alone latex syntax error fixing

post_dir = '_posts/'
md_files = os.listdir(post_dir)
md_files = [f for f in md_files if f.endswith('.md')]

## If there is double dollar sign '$$\n' in the file add a blank line at before and after the double dollar sign

for md_file in md_files:
    with open(os.path.join(post_dir, md_file)) as f:
        lines = f.readlines()

    for i, line in enumerate(lines):
        if line == '$$\n' and lines[i-1] != '\n':
            lines.insert(i, '\n')

    for i, line in enumerate(lines):
        if line == '$$\n' and lines[i+1] != '\n':
            lines.insert(i+1, '\n')

    with open(os.path.join(post_dir, md_file), 'w') as f:
        f.writelines(lines)