# Move Tags to Frontmatter
A script that does the the following:
1. Grabs all markdown files in the current directory and its subdirectories.
2. For each markdown file:
   - Move all occurrences of tags in the body `#<Word>` to the front of the file as a YAML frontmatter `tags: <Word>` .

## Usage
1. Move this .ipynb file to the desired directory (all subdirectories will also be affected).
2. Run the whole notebook.


In [10]:
!pip3 install python-frontmatter

[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[33mDEPRECATION: Configuring installation scheme with distutils config files is deprecated and will no longer work in the near future. If you are using a Homebrew or Linuxbrew Python, please see discussion at https://github.com/Homebrew/homebrew-core/issues/76621[0m[33m
[0m

In [18]:
import frontmatter
import os

exclude_directories = ['./Template', './.trash/']
# Get the paths of all markdown files in the current directory and its subdirectories, excluding the directories in exclude_directories
all_markdown_files = [os.path.join(dirpath, f) for dirpath, dirnames, files in os.walk('.') for f in files if f.endswith('.md')]
# Filter out the files in exclude_directories from all_markdown_files
markdown_files = [file for file in all_markdown_files if not any(file.startswith(exclude_dir) for exclude_dir in exclude_directories)]
def get_markdown_files():
	return [file for file in all_markdown_files if not any(file.startswith(exclude_dir) for exclude_dir in exclude_directories)]

In [12]:
import re
"""
A function that takes the path of a markdown file as input and returns a list of tags in the file's content (below the frontmatter)

            Parameters:
                    path (str): A path to a markdown file

            Returns:
                    tags (list[str]): A list of tags in the file's content as a list of strings
"""
def get_inline_tags(path: str) -> list[str]:
        frontmatter_data = frontmatter.load(path)
        # Remove all content wrapped with double brackets [[]], single brackets [], and parentheses () since #<Word> in double brackets imply a link to a section of a document
        file_contents_not_in_parentheses = re.sub(r'\(.*?\)', '', frontmatter_data.content)
        file_contents_not_in_square_brackets = re.sub(r'\[.*?\]', '', file_contents_not_in_parentheses)
        # Chars allowed in tags are a-z, A-Z, 0-9, dash, and underscore (cannot start with a number)
        chars_allowed_in_tags = r'#([a-zA-Z/\-_][a-zA-Z0-9/\-_]*)'
        # Regex match all tags in the content of the markdown file
        tags = re.findall(chars_allowed_in_tags, file_contents_not_in_square_brackets)
        # Remove duplicates from the list of tags
        tags = list(set(tags))
        # Sort the list of tags alphabetically in descending order
        tags.sort(reverse=True)
        return tags

In [None]:
for file in markdown_files:
	inline_tags = get_inline_tags(file)
	if len(inline_tags) > 0:
		print(inline_tags)
		# Add the inline_tags to the frontmatter of the markdown file
		frontmatter_data = frontmatter.load(file)
		# If the frontmatter already has tags
		if 'tags' in frontmatter_data.metadata:
			# If the tags are a list, add the inline_tags to the existing tags
			if isinstance(frontmatter_data.metadata['tags'], list):
				frontmatter_data.metadata['tags'] += inline_tags
			# Else it is a string, and so we'll convert it to a list and add the inline_tags
			else:
				frontmatter_data.metadata['tags'] = [frontmatter_data.metadata['tags']] + inline_tags
		else:
			frontmatter_data.metadata['tags'] = inline_tags
			
		# Remove the inline_tags from the content of the markdown file
		for tag in inline_tags:
			frontmatter_data.content = frontmatter_data.content.replace(f"#{tag}", '')
		frontmatter.dump(frontmatter_data, file)
		print(f'Added tags to {file}')
	else:
		print(f'No inline tags found in {file}')

In [None]:
# Sort tags
for file in markdown_files:
	frontmatter_data = frontmatter.load(file)
	# If 'tags' exists in the frontmatter of the markdown file and is a list
	if frontmatter_data.metadata.get('tags') is not None and isinstance(frontmatter_data.metadata['tags'], list):
		# Sort and remove duplicates from the list of tags
		frontmatter_data.metadata['tags'].sort()
		frontmatter.dump(frontmatter_data, file)
		

In [None]:
# Set the 'date' field in the frontmatter of the markdown file to the date in the first  heading, if it exists
for file in markdown_files:
	frontmatter_data = frontmatter.load(file)
	content = frontmatter_data.content
	# If there is a # [[YYYY-MM-DD]] in the content of the markdown file
	if re.search(r'# \[\[\d{4}-\d{2}-\d{2}\]\]', content):
		# Get the date in the first heading
		date = re.search(r'# \[\[\d{4}-\d{2}-\d{2}\]\]', content).group(0)
		print(date)
		# Remove the date from the content of the markdown file
		content = content.replace(date, '# ', 1)
		# Set the 'date' field in the frontmatter of the markdown file to the date in the first heading
		frontmatter_data.metadata['date'] = date[4:-2]
		# Replace all spaces in [[]] with _ in regex
		content = re.sub(r'\[\[\s*(\S*)\s*\]\]', r'[[\1]]', content)


		frontmatter.dump(frontmatter_data, file)



In [None]:
# Move files tagged "Type/__" to the folder "Notes/__/{file_name}"
for file in markdown_files:
	# If the frontmatter of the markdown file has a 'Type/Youtube_Notes' in the 'tags' field
	frontmatter_data = frontmatter.load(file)
	if frontmatter_data.metadata.get('tags') is not None and 'Type/Book' in frontmatter_data.metadata['tags']:
		# Move the file in the folder 'Notes/Videos'
		file_name = os.path.basename(file)
		os.rename(file, f'Notes/Books/{file_name}')
		print(f'Moved {file} to Notes/Books')

	

In [None]:
# For file in "Notes/Books", grab all Wikilinks and move them into "Notes/Books" as well
for file in os.listdir('Notes/Books'):
	if file.endswith('.md'):
		frontmatter_data = frontmatter.load(f'Notes/Books/{file}')
		content = frontmatter_data.content
		# Match contents of wikilink, even if it has an alias
		wikilink_regex = r'\[\[(.*?)\]\]'
		# If there is a Wikilink in the content of the markdown file
		if re.search(r'\[\[.*?\]\]', content):
			# Get the Wikilink
			wikilink = re.search(r'\[\[.*?\]\]', content).group(0)

			# Move the Wikilink to the folder 'Notes/Books'
			os.rename(f'Notes/Books/{file}', f'Notes/Books/{wikilink[3:-2]}.md')
			print(f'Moved {wikilink[3:-2]} to Notes/Books')

In [9]:
# Get a list of all files with more than one tag beginning with "Type" (more than one Type)
def get_files_with_multiple_types(markdown_files):
	for file in markdown_files:
		frontmatter_data = frontmatter.load(file)
		# If 'tags' exists in the frontmatter of the markdown file and is a list
		if frontmatter_data.metadata.get('tags') is not None and isinstance(frontmatter_data.metadata['tags'], list):
			# Get list of tags that start with "Type"
			tags_starting_with_type = [tag for tag in frontmatter_data.metadata['tags'] if tag.startswith('Type')]
			# If tags has more than one tag that starts with "Type"
			if len(tags_starting_with_type) > 1:
				# Move the file in the folder '0. Note Has Multiple Types'
				file_name = os.path.basename(file)
				os.rename(file, f'0. Note Has Multiple Types/{file_name}')
				print(f'Moved {file} to 0. Note Has Multiple Types')
refreshMarkdownFiles()
get_files_with_multiple_types(markdown_files)

Moved ./0. Note Has Multiple Types/History Remembers Those Who Believe in the Future.md to 0. Note Has Multiple Types
Moved ./0. Note Has Multiple Types/Cathedral Effect.md to 0. Note Has Multiple Types
Moved ./0. Note Has Multiple Types/Do Not Be the Child Your Parents Wants. Be the Child You Would Want Your Own Child to Be..md to 0. Note Has Multiple Types
Moved ./0. Note Has Multiple Types/I Would Like to Be My Child's Advisor.md to 0. Note Has Multiple Types
Moved ./0. Note Has Multiple Types/Why Keeping the Room Cool Helps Sleep.md to 0. Note Has Multiple Types
Moved ./0. Note Has Multiple Types/Why I Prefer Data Engineering Roles.md to 0. Note Has Multiple Types
Moved ./0. Note Has Multiple Types/Efficiency is Doing Things Right. Effectiveness is Doing the Right Things..md to 0. Note Has Multiple Types
Moved ./0. Note Has Multiple Types/Bluebooking for Happiness.md to 0. Note Has Multiple Types
Moved ./0. Note Has Multiple Types/AVAs in the Palms, Bottom of Feet, and Upper Half o

In [15]:
# For file in markdown_files, if the file has a f'Type/{source}' in the 'tags' field, remove the f'Type/{source}' tag and add a "source" field with the source
def remove_type_add_source(source):
	for file in get_markdown_files():
		frontmatter_data = frontmatter.load(file)
		if frontmatter_data.metadata.get('tags') is not None and f'Type/{source}' in frontmatter_data.metadata['tags']:
			frontmatter_data.metadata['source'] = source
			print(f'Added source {source} to {file}')
			frontmatter_data.metadata['tags'].remove(f'Type/{source}')
			frontmatter.dump(frontmatter_data, file)
			print(f'Removed source {source} from {file}')
remove_type_add_source('Derived')

FileNotFoundError: [Errno 2] No such file or directory: './Journal/Personal Life/2022-02-09 Dinner with Aarya, Then Ahmed.md'

In [19]:
# For file in markdown_files, if the file has a source in the "source" field, remove the categories field and add a f'Type/{category}' in the 'tags' field
def turn_source_to_tag():
	for file in get_markdown_files():
		frontmatter_data = frontmatter.load(file)
		if frontmatter_data.metadata.get('source') is not None:
			source = frontmatter_data.metadata['source']
			if frontmatter_data.metadata.get('tags') is None:
				frontmatter_data.metadata['tags'] = []
			# If exists and it is a string
			if frontmatter_data.metadata.get('tags') is not None and isinstance(frontmatter_data.metadata['tags'], str):
				frontmatter_data.metadata['tags'] = [frontmatter_data.metadata['tags']]
			frontmatter_data.metadata['tags'].append(f"Type/{source}")
			print(f'Added source {source} to {file}')
			# Remove the 'source' field
			del frontmatter_data.metadata['source']
			frontmatter.dump(frontmatter_data, file)
			print(f'Removed source {source} from {file}')
turn_source_to_tag()

Added source Original to ./Cards/Quotes/My Job Isn't to Implement My Solutions. My Job is to Solve Your Problems..md
Removed source Original from ./Cards/Quotes/My Job Isn't to Implement My Solutions. My Job is to Solve Your Problems..md
Added source Derived to ./Cards/Quotes/"Give Feedforward, Not feedback.".md
Removed source Derived from ./Cards/Quotes/"Give Feedforward, Not feedback.".md
Added source Derived to ./Cards/Fun Facts/Mozart's widow destroyed 90% of his early drafts to reinforce the notion that he was a prodigy #Fun_Facts.md
Removed source Derived from ./Cards/Fun Facts/Mozart's widow destroyed 90% of his early drafts to reinforce the notion that he was a prodigy #Fun_Facts.md
Added source Derived to ./Cards/Fun Facts/Baker Miller Pink Reduces Agression in Men.md
Removed source Derived from ./Cards/Fun Facts/Baker Miller Pink Reduces Agression in Men.md
Added source Derived to ./Cards/Fun Facts/3 Laws of Thermodynamics.md
Removed source Derived from ./Cards/Fun Facts/3 La