# UMGC Catalog
## Step 2: Parse Text to Create Course Dictionary

### Read in raw text file

In [1]:
import sys
import re

## Regular Expressions

# Course Start and End patterns
course_pattern = re.compile(r'^([A-Z]{3}[A-Z]? \d{3}[A-Z]?) ([A-Z].+?) \((\d(?:–\d)?)\)$')
course_start_pattern = re.compile(r'^([A-Z]{3}[A-Z]? \d{3}[A-Z]?) ([A-Z].+?)$')
course_end_pattern = re.compile(r'^(.+?) \((\d(?:–\d)?)\)$')

# Prerequisites
prerequisite_pattern = re.compile(r'Prerequisites?:\s(.*?)(?=\.\s|\.\))', re.DOTALL)
    
# Recommended
recommended_pattern = re.compile(r'Recommended:\s([^\.]+)\.\s')
    
# Substitutions
substitution_pattern = re.compile(r'may\s+receive\s+credit\s+for\s+only\s+one\s+of\s+the\s+following\s+courses: ')

# Warnings 
warning_pattern = re.compile(r'^\(([^\)]+)\)')

## Functions for creating courses

def create_new_course(name, title, credit):
    course = {
        'name': name.strip(),
        'title': title.strip(),
        'credit': credit.strip(),
        'description': '',
        'prerequisites': '',
        'recommended': '',
        'warnings': '',
        'substitutions': '',
        'pre': '',
        'pre_credits': '',
        'pre_notes': ''
    }
    return course

def update_description(course, sub=substitution_pattern, pre=prerequisite_pattern, 
                       warn=warning_pattern, recd=recommended_pattern):
    description = re.sub(r'\n', '', course['description'])
    # Substitutions
    submatch = sub.search(description)
    if submatch:
        start, end = submatch.span()
        course['substitutions'] = description[end:-1].strip()
    # Prerequisites
    prematch = pre.search(description)
    if prematch:
        course['prerequisites'] = prematch.group(1)
    # Recommended
    recmatch = recd.search(description)
    if recmatch:
        course['recommended'] = recmatch.group(1)
    # Warnings
    warnmatch = warn.search(description)
    if warnmatch:
        course['warnings'] = warnmatch.group(1)
    
    return course

def parse_course_info(text):
    
    # Logic Overview
    #
    # A course starts by matching a pattern, either
    #
    #     OneLine = STAT 221 Introduction to Statistics (3)
    # or
    #     TwoLine = STAT 536 A Really Long Description that 
    #               Takes Up More than One Line (1-3)
    #
    # - `course_match` will match OneLine and a new course is created
    # - `course_match_start` will match the first line of TwoLine 
    #   and make the variable `start_course = True`
    # - `course_match_end` will match the second line of TwoLine and
    #   a new course is created. This is checked only if the variable 
    #   `start_course = True`
    # - `course_match` and `course_match_end` will turn on the indicator
    #   variable `description_on = True`
    # - a blank line will set `description_on = False` and indicates
    #   the end of the course.
    # - after a course is ended, it is moved to `prior_course` and the
    #   description is parsed to populate warnings, prerequisites, and 
    #   recommended in the course
    
    courses = []
    prior_course = None
    current_course = None
    start_course = False
    add_description = False

    for line in text.split('\n'):
        
        course_match = course_pattern.search(line)
        course_match_start = course_start_pattern.search(line)
        course_match_end = course_end_pattern.search(line)

        if start_course:
            # Add the second half of a two-line course title
            if course_match_end:
                course_title2, course_credit = course_match_end.groups()
                course_title = course_title1.strip() + ' ' + course_title2.strip()
                current_course = create_new_course(course_name, course_title, course_credit)
                courses.append(current_course)
                start_course = False
                add_description = True

        elif course_match:
            # If we find a course line, extract information and start a new course
            course_name, course_title, course_credit = course_match.groups()
            current_course = create_new_course(course_name, course_title, course_credit)
            courses.append(current_course)
            start_course = False
            add_description = True

        elif course_match_start:
            course_name, course_title1 = course_match_start.groups()
            start_course = True                    
            
        elif current_course is not None:
            # If we are in the middle of a course, add the line to its description
            if add_description:
                # When the description ends, parse it to fill in prerequisites, 
                # recommended, warnings, and substitutions
                if line == '':
                    add_description = False
                    current_course = update_description(course=current_course)
                else:
                    current_course['description'] += line + '\n'

    return courses

In [2]:
infile = 'tmp_pdf2txt.txt'
with open(infile, 'r') as file:
    # Read the entire content of the file into a string
    text = file.read()

courses = parse_course_info(text)

# Create a dictionary with name as the key
all_classes = {course['name']: course for course in courses}

len(all_classes)

970

In [3]:
# filter out graduate courses
grad_pattern = re.compile(r'([A-Z]{3}[A-Z]? [5-9]\d{2}[A-Z]?)')
classes = {key: value for key, value in all_classes.items() if not grad_pattern.match(key)}

len(classes)

672

### Iterate over courses

In [4]:
## Regular expression patterns to extract information from the catalog

class_template = r'([A-Z]{3}[A-Z]? \d{3}[A-Z]?)'
A = class_template

# to filter out graduate courses
grad_template = r'([A-Z]{3}[A-Z]? [5-9]\d{2}[A-Z]?)'
G = grad_template

In [5]:
#######################################################################
## Adding the field 'done' to exclude while editing
## enables us to catch edge cases

def initialize_done(classes):
    # Initialize 'done' items in classes dictionary
    # We will iterate over items that are not done in the future
    
    for class_name, class_info in classes.items():
        prerequisites_text = class_info['prerequisites']
        if prerequisites_text == '':
            class_info['done'] = 1
        else:
            class_info['done'] = 0
    return classes
#######################################################################

def count_done():
    return len([entry for entry in classes.values() if entry['done'] == 1])

# Mark done all classes without prerequisites
classes = initialize_done(classes)
count_done()

255

### Singletons

In [6]:
#######################################################################
singleton = re.compile(f'^{A}$')

def mark_all_singles(classes, pattern=singleton):
    # Mark classes with prerequisites done so we don't iterate over them anymore
    for class_name, class_info in classes.items():
        if class_info['done'] == 0:
            prerequisites_text = class_info['prerequisites']
            match = pattern.search(prerequisites_text)
            if match:
                class_info['pre']  = prerequisites_text
                class_info['done'] = 1
    return classes
#######################################################################

# Mark done classes with single prerequisite
classes = mark_all_singles(classes)
count_done()

434

### Simple Multiple Patterns

In [7]:
#######################################################################
course_or_list2 = re.compile(f'^{A} or {A}$')
course_or_list2a = re.compile(f'^{A} \(or {A}\)$')
course_or_list3 = re.compile(f'^{A}, {A}, or {A}$')
course_or_list4 = re.compile(f'^{A}, {A}, {A}, or {A}$')
course_or_list5 = re.compile(f'^{A}, {A}, {A}, {A}, or {A}$')

course_and_list2 = re.compile(f'^{A} and {A}$')
course_and_list3 = re.compile(f'^{A}, {A}, and {A}$')
course_and_list4 = re.compile(f'^{A}, {A}, {A}, and {A}$')
course_and_list5 = re.compile(f'^{A}, {A}, {A}, {A}, and {A}$')

def update_all_prerequisites(classes, pattern, type='or'):   
    for class_name, class_info in classes.items():
        if class_info['done'] == 0:
            prerequisites_text = class_info['prerequisites']
            match = pattern.search(prerequisites_text)
            if match:
                groups = match.groups()
                if type == 'or': 
                    replaced_text = f'({ " | ".join(groups) })'
                elif type == 'and':
                    replaced_text = f'({ " & ".join(groups) })'
                class_info['pre'] = replaced_text
                class_info['done'] = 1
                
    return classes
#######################################################################

# Mark done classes with known patterns
classes = update_all_prerequisites(classes, pattern = course_or_list2, type='or')
classes = update_all_prerequisites(classes, pattern = course_or_list2a, type='or')
classes = update_all_prerequisites(classes, pattern = course_or_list3, type='or')
classes = update_all_prerequisites(classes, pattern = course_or_list4, type='or')
classes = update_all_prerequisites(classes, pattern = course_or_list5, type='or')

classes = update_all_prerequisites(classes, pattern = course_and_list2, type='and')
classes = update_all_prerequisites(classes, pattern = course_and_list3, type='and')
classes = update_all_prerequisites(classes, pattern = course_and_list4, type='and')
classes = update_all_prerequisites(classes, pattern = course_and_list5, type='and')

count_done()

513

### Writing Class Patterns

In [8]:
#######################################################################
writing_patterns = [
    r'^WRTG 112 or equivalent',
    r'^WRTG 112 or equiva-lent',
    r'^A writing course',
    r'^Any writing course',
    r'^Any WRTG course'
]
course_writing = re.compile('|'.join(writing_patterns))

def update_writing(classes, pattern=course_writing):   
    for class_name, class_info in classes.items():
        if class_info['done'] == 0:
            prerequisites_text = class_info['prerequisites']
            match = pattern.search(prerequisites_text)
            if match:
                replaced_text = 'WRTG 112*'
                class_info['pre'] = replaced_text
                class_info['pre_notes'] = 'or equivalent'
                class_info['done'] = 1           
    return classes
#######################################################################

classes = update_writing(classes)
count_done()

554

### Foreign Language Patterns

In [9]:
#######################################################################
#language_patterns = [
#    f'^{A} or appropriate score on a place',
#    f'^{A} or appropri-ate score on a place'
#]
#course_language = re.compile('|'.join(language_patterns))

language_pattern1 = re.compile(f'^{A} or appropriate score on a place')
language_pattern2 = re.compile(f'^{A} or appropri-ate score on a place')

def update_language(classes, pattern):   
    for class_name, class_info in classes.items():
        if class_info['done'] == 0:
            prerequisites_text = class_info['prerequisites']
            match = pattern.search(prerequisites_text)
            if match:
                replaced_text = match.group(1) + '*'
                class_info['pre'] = replaced_text
                class_info['pre_notes'] = 'placement test'
                class_info['done'] = 1               
    return classes
#######################################################################

classes = update_language(classes,language_pattern1)
classes = update_language(classes,language_pattern2)
count_done()

576

### Prior Approval Patterns

### Complex And/Or Patterns

In [10]:
def debug_remaining(done=0, classes=classes):
    for class_name, class_info in classes.items():
        if class_info['done'] == done:
            print (class_name + ':', class_info['prerequisites'])

In [11]:
def update_general_pattern(classes, pattern, replacement_function):   
    for class_name, class_info in classes.items():
        if class_info['done'] == 0:
            prerequisites_text = class_info['prerequisites']
            match = pattern.search(prerequisites_text)
            if match:
                replaced_text = replacement_function(match)
                class_info['pre'] = replaced_text
                class_info['done'] = 1               
    return classes

In [12]:
#######################################################################
prior_pattern = re.compile('(9 credits in the discipline and prior program approval)')

def update_prior_program(classes, pattern=prior_pattern):   
    for class_name, class_info in classes.items():
        if class_info['done'] == 0:
            prerequisites_text = class_info['prerequisites']
            match = pattern.search(prerequisites_text)
            if match:
                class_info['pre_credits'] = 9
                class_info['pre_notes'] = 'prior program approval'
                class_info['done'] = 1         
    return classes
#######################################################################

classes = update_prior_program(classes)
count_done()

628

In [13]:
#######################################################################
course_and_or_list3 = re.compile(f'^{A} and {A} \(or {A}\)$')
def replacement_function(match):
    return '(' + match.group(1) + ' & (' + match.group(2) + ' | ' + match.group(3) + '))'

classes = update_general_pattern(classes, course_and_or_list3, replacement_function)
count_done()

630

In [14]:
#######################################################################
course_or_and_list5 = re.compile(f'^{A}, {A}, {A} \(or {A}\), and {A}$')
## Not working, missing something
def replacement_function(match):
    return '(' + match.group(1) + ' & ' + match.group(2) + ' & ' + match.group(5) + ' (' + match.group(3) + ' | ' + match.group(4) + '))'

classes = update_general_pattern(classes, course_or_and_list5, replacement_function)
count_done()

632

In [15]:
#######################################################################
course_and_or_list3a = re.compile(f'^{A} \(or {A}\) and {A}$')
def replacement_function(match):
    return '((' + match.group(1) + ' | ' + match.group(2) + ') & ' + match.group(3) + ')'

classes = update_general_pattern(classes, course_and_or_list3a, replacement_function)
count_done()

634

In [16]:
#######################################################################
course_spch = re.compile(r'Any SPCH course or COMM 300')
def replacement_function(match):
    return '(SPCH 100+ | COMM 300)'

classes = update_general_pattern(classes, course_spch, replacement_function)
count_done()

638

In [17]:
#######################################################################
course_or_pattern1 = re.compile(f'^{A} \(or {A}\) or {A}$')
course_or_pattern2 = re.compile(f'^{A} or {A} \(or {A}\)$')

def replacement_function(match):
    return '(' + match.group(1) + ' | ' + match.group(2) + ' | ' + match.group(3) + ')'

classes = update_general_pattern(classes, course_or_pattern1, replacement_function)
classes = update_general_pattern(classes, course_or_pattern2, replacement_function)
count_done()

641

In [18]:
#######################################################################
course_nsci = re.compile(r'MATH 105, STAT 200, or a higher MATH or STAT course')
def replacement_function(match):
    return '(MATH 105 | STAT 200 | MATH 300+ | STAT 300+)'

classes = update_general_pattern(classes, course_nsci, replacement_function)
count_done()

646

In [19]:
#######################################################################
course_span = re.compile(r'Any 300-level SPAN course or appropriate score on a placement test')

def update_spanish(classes, pattern=course_span):   
    for class_name, class_info in classes.items():
        if class_info['done'] == 0:
            prerequisites_text = class_info['prerequisites']
            match = pattern.search(prerequisites_text)
            if match:
                class_info['pre'] = 'SPAN 300+'
                class_info['pre_notes'] = 'placement test'
                class_info['done'] = 1         
    return classes
#######################################################################

classes = update_spanish(classes)
count_done()

648

In [20]:
#######################################################################
course_or_and_pattern4a = re.compile(f'^{A} \(or {A}\), {A}, or {A}$')
course_or_and_pattern4b = re.compile(f'^{A} \(or {A}\) and {A} \(or {A}\)$')

def replacement_function(match):
    return '((' + match.group(1) + ' | ' + match.group(2) + ') & (' + match.group(3) + ' | ' + match.group(4) + '))'

classes = update_general_pattern(classes, course_or_and_pattern4a, replacement_function)
classes = update_general_pattern(classes, course_or_and_pattern4b, replacement_function)
count_done()

650

In [21]:
## Individual edge cases
classes['APTC 495'].update({
	'pre_credits': 27, 
	'done': 1})
classes['BEHS 495'].update({ 
	'pre': 'BEHS 300', 
	'pre_notes': 'completion of all requirements for the social science major', 
	'done': 1})
classes['BIOL 230'].update({ 
	'pre': 'BIOL 103', 
	'pre_notes': 'or other introductory biology course with laboratory',
	'done': 1})
classes['BIOL 357'].update({ 
	'pre': '(BIOL 325 | BIOL 300+)',
	'done': 1})
classes['COMM 495'].update({ 
	'pre': '(COMM 300 & COMM 302)', 
	'pre_credits': 9,
	'pre_notes': '9 credits (COMM 300+ | SPCH 300+ | JOUR 300+)',
	'done': 1})
classes['CMIT 320'].update({ 
	'pre': 'CMIT 265',
	'pre_notes': 'or CompTIA Network+ certification',
	'done': 1})
classes['CMIT 424'].update({ 
	'pre': '(CMIT 202 & CMIT 320 & CCJS 321)',
	'pre_notes': 'CMIT 202 (or CompTIA A+ certification), CMIT 320 (or CompTIA Security+ certification)',
	'done': 1})
classes['CMIT 495'].update({ 
	'pre_credits': 27,
	'pre_notes': 'CMIT coursework', 
	'done': 1})
classes['CMSC 498'].update({ 
	'pre_notes': 'Vary according to topic', 
	'done': 1})
classes['CMST 495'].update({ 
	'pre_credits': 24,
	'pre_notes': 'within the major', 
	'done': 1})
classes['CSIA 300'].update({ 
	'pre': '(CMIS 100+ | CMIT 100+ | CMSC 100+ | CMST 100+ | CSIA 100+ | DATA 100+ | IFSM 100+ | SDEV 100+)', 
	'done': 1})
classes['CSIA 310'].update({ 
	'pre': '(IFSM 201 & WRTG 112*)', 
	'done': 1})
classes['DATA 230'].update({ 
	'pre': '(STAT 200 & (MATH 115 | MATH 108))',
	'pre_notes': 'or higher', 
	'done': 1})
classes['ENGL 495'].update({ 
	'pre': '(ENGL 240 & ENGL 303)',
	'pre_credits': 9,
	'pre_notes': 'ENGL 300+', 
	'done': 1})
classes['ENHS 495'].update({ 
	'pre': '(ENHS 305 & ENHS 330 & ENHS 340)',
	'pre_credits': 30,
	'pre_notes': 'ENHS courses',
	'done': 1})
classes['HIST 289'].update({ 
	'pre': 'HIST 1xx',
	'done': 1})
classes['HIST 495'].update({ 
	'pre': '(HIST 289 & HIST 309)', 
	'pre_credits': 21,
	'pre_notes': 'HIST courses',
	'done': 1})
classes['HMLS 495'].update({ 
	'pre_credits': 15,
	'pre_notes': 'FSCN 300+, EMGT 300+, HMLS 300+, or PSAD 300+',
	'done': 1})
classes['HUMN 495'].update({ 
	'pre': '(HUMN 100 & ARTH 300+ & ENGL 300+ & HUMN 300+ & PHIL 300+)', 
	'done': 1})
classes['IFSM 461'].update({ 
	'pre': '(IFSM 311 & (IFSM 330 | CMIS 320)', 
	'done': 1})
classes['PHYS 121'].update({ 
	'pre': '(MATH 108 | MATH 115)', 
	'pre_notes': 'or knowledge of college-level trigonometry', 
	'done': 1})
classes['PSYC 495'].update({ 
	'pre': '(PSYC 100 & PSYC 300)', 
	'pre_notes': 'completion of all require-ments for the psychology major', 
	'done': 1})


In [22]:
debug_remaining()

In [24]:
import pandas as pd
df = pd.DataFrame(classes).transpose()
#df = df.transpose()

In [25]:
df.head()

Unnamed: 0,name,title,credit,description,prerequisites,recommended,warnings,substitutions,pre,pre_credits,pre_notes,done
ACCT 220,ACCT 220,Principles of Accounting I,3,An introduction to the basic theory and techni...,,,,ACCT 220 or BMGT 220,,,,1
ACCT 221,ACCT 221,Principles of Accounting II,3,Prerequisite: ACCT 220. Further study of conte...,ACCT 220,,,ACCT 221 or BMGT 221,ACCT 220,,,1
ACCT 301,ACCT 301,Accounting for Nonaccounting Managers,3,(May not be applied toward a major in accounti...,,,May not be applied toward a major in accounting.,"ACCT 301, MGMT 301, or MGST 301",,,,1
ACCT 310,ACCT 310,Intermediate Accounting I,3,(Students should be cautious about enrolling i...,ACCT 221,,Students should be cautious about enrolling in...,ACCT 310 or BMGT 310,ACCT 221,,,1
ACCT 311,ACCT 311,Intermediate Accounting II,3,(A continuation of ACCT 310. Students should b...,ACCT 310,,A continuation of ACCT 310. Students should be...,ACCT 311 or BMGT 311,ACCT 310,,,1


In [None]:
classes['PSYC 495']

## Step 3: Save the Dictionary to SQLite3

In [26]:
import sqlite3

In [27]:
conn = sqlite3.connect('UMGC.db')

In [28]:
c = conn.cursor()
c.execute('''
    CREATE TABLE classes (
        name TEXT,
        title TEXT,
        credit TEXT,
        description TEXT,
        prerequisites TEXT,
        recommended TEXT,
        warnings TEXT,
        substitutions TEXT,
        pre TEXT,
        pre_credits TEXT,
        pre_notes TEXT,
        done INTEGER
    )
''')


<sqlite3.Cursor at 0x10f6ca7c0>

In [29]:
# Insert data into the table

for class_name, class_info in classes.items():
    c.execute('''
        INSERT INTO classes VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
    ''', (
        class_info['name'],
        class_info['title'],
        class_info['credit'],
        class_info['description'],
        class_info['prerequisites'],
        class_info['recommended'],
        class_info['warnings'],
        class_info['substitutions'],
        class_info['pre'],
        class_info['pre_credits'],
        class_info['pre_notes'],
        int(class_info['done'])
    ))


In [30]:
conn.commit()
conn.close()

In [31]:

data_json = [
    {
        "seq": 1,
        "name": "PACE 111B",
        "credits": 3,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 1
    },
    {
        "seq": 2,
        "name": "LIBS 150",
        "credits": 1,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 1
    },
    {
        "seq": 3,
        "name": "WRTG 111",
        "credits": 3,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 1
    },
    {
        "seq": 4,
        "name": "WRTG 112",
        "credits": 3,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 2
    },
    {
        "seq": 5,
        "name": "NUTR 100",
        "credits": 3,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 2
    },
    {
        "seq": 6,
        "name": "BMGT 110",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "",
        "period": 2
    },
    {
        "seq": 7,
        "name": "SPCH 100",
        "credits": 3,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 3
    },
    {
        "seq": 8,
        "name": "STAT 200",
        "credits": 3,
        "color": "red",
        "textcolor": "white",
        "prerequisite": "",
        "period": 3
    },
    {
        "seq": 9,
        "name": "IFSM 300",
        "credits": 3,
        "color": "red",
        "textcolor": "white",
        "prerequisite": "",
        "period": 3
    },
    {
        "seq": 10,
        "name": "ACCT 220",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "",
        "period": 4
    },
    {
        "seq": 11,
        "name": "HUMN 100",
        "credits": 3,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 4
    },
    {
        "seq": 12,
        "name": "BIOL 103",
        "credits": 4,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 5
    },
    {
        "seq": 13,
        "name": "ECON 201",
        "credits": 3,
        "color": "red",
        "textcolor": "white",
        "prerequisite": "",
        "period": 4
    },
    {
        "seq": 14,
        "name": "ARTH 334",
        "credits": 3,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "",
        "period": 5
    },
    {
        "seq": 15,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 6
    },
    {
        "seq": 16,
        "name": "ECON 203",
        "credits": 3,
        "color": "red",
        "textcolor": "white",
        "prerequisite": "",
        "period": 6
    },
    {
        "seq": 17,
        "name": "ACCT 221",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "ACCT 220",
        "period": 6
    },
    {
        "seq": 18,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 7
    },
    {
        "seq": 19,
        "name": "BMGT 364",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "",
        "period": 7
    },
    {
        "seq": 20,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 7
    },
    {
        "seq": 21,
        "name": "BMGT 365",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "BMGT 364",
        "period": 8
    },
    {
        "seq": 22,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 8
    },
    {
        "seq": 23,
        "name": "MRKT 310",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "",
        "period": 8
    },
    {
        "seq": 24,
        "name": "WRTG 394",
        "credits": 3,
        "color": "green",
        "textcolor": "white",
        "prerequisite": "WRTG 112",
        "period": 9
    },
    {
        "seq": 25,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 9
    },
    {
        "seq": 26,
        "name": "BMGT 380",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "",
        "period": 9
    },
    {
        "seq": 27,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 10
    },
    {
        "seq": 28,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 10
    },
    {
        "seq": 29,
        "name": "HRMN 300",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "",
        "period": 10
    },
    {
        "seq": 30,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 11
    },
    {
        "seq": 31,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 11
    },
    {
        "seq": 32,
        "name": "FINC 330",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "ACCT 221 & STAT 200",
        "period": 11
    },
    {
        "seq": 33,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 12
    },
    {
        "seq": 34,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 12
    },
    {
        "seq": 35,
        "name": "BMGT 496",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "",
        "period": 12
    },
    {
        "seq": 36,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 13
    },
    {
        "seq": 37,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 13
    },
    {
        "seq": 38,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 13
    },
    {
        "seq": 39,
        "name": "ELECTIVE",
        "credits": 3,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "",
        "period": 14
    },
    {
        "seq": 40,
        "name": "BMGT 495",
        "credits": 3,
        "color": "blue",
        "textcolor": "white",
        "prerequisite": "BMGT 365 & MRKT 310 & FINC 330",
        "period": 14
    },
    {
        "seq": 41,
        "name": "CAPSTONE",
        "credits": 1,
        "color": "yellow",
        "textcolor": "black",
        "prerequisite": "FINC 330",
        "period": 14
    }
]


In [32]:
df = pd.DataFrame(data_json)

In [33]:
df.head()

Unnamed: 0,seq,name,credits,color,textcolor,prerequisite,period
0,1,PACE 111B,3,green,white,,1
1,2,LIBS 150,1,green,white,,1
2,3,WRTG 111,3,green,white,,1
3,4,WRTG 112,3,green,white,,2
4,5,NUTR 100,3,green,white,,2


In [34]:
json_data = df.to_json(orient='records')


In [35]:
json_data

'[{"seq":1,"name":"PACE 111B","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":1},{"seq":2,"name":"LIBS 150","credits":1,"color":"green","textcolor":"white","prerequisite":"","period":1},{"seq":3,"name":"WRTG 111","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":1},{"seq":4,"name":"WRTG 112","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":2},{"seq":5,"name":"NUTR 100","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":2},{"seq":6,"name":"BMGT 110","credits":3,"color":"blue","textcolor":"white","prerequisite":"","period":2},{"seq":7,"name":"SPCH 100","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":3},{"seq":8,"name":"STAT 200","credits":3,"color":"red","textcolor":"white","prerequisite":"","period":3},{"seq":9,"name":"IFSM 300","credits":3,"color":"red","textcolor":"white","prerequisite":"","period":3},{"seq":10,"name":"ACCT 220","credits":3,"color":"blue","t

In [36]:
data_dict = df.to_dict(orient='records')


In [37]:
data_dict

[{'seq': 1,
  'name': 'PACE 111B',
  'credits': 3,
  'color': 'green',
  'textcolor': 'white',
  'prerequisite': '',
  'period': 1},
 {'seq': 2,
  'name': 'LIBS 150',
  'credits': 1,
  'color': 'green',
  'textcolor': 'white',
  'prerequisite': '',
  'period': 1},
 {'seq': 3,
  'name': 'WRTG 111',
  'credits': 3,
  'color': 'green',
  'textcolor': 'white',
  'prerequisite': '',
  'period': 1},
 {'seq': 4,
  'name': 'WRTG 112',
  'credits': 3,
  'color': 'green',
  'textcolor': 'white',
  'prerequisite': '',
  'period': 2},
 {'seq': 5,
  'name': 'NUTR 100',
  'credits': 3,
  'color': 'green',
  'textcolor': 'white',
  'prerequisite': '',
  'period': 2},
 {'seq': 6,
  'name': 'BMGT 110',
  'credits': 3,
  'color': 'blue',
  'textcolor': 'white',
  'prerequisite': '',
  'period': 2},
 {'seq': 7,
  'name': 'SPCH 100',
  'credits': 3,
  'color': 'green',
  'textcolor': 'white',
  'prerequisite': '',
  'period': 3},
 {'seq': 8,
  'name': 'STAT 200',
  'credits': 3,
  'color': 'red',
  'textc

In [38]:
html_code = '''
<!DOCTYPE html>
<html>
<head>
  <style>
    body {
      text-align: center;
    }
    
    svg {
      margin-top: 12px;
      border: 1px solid #aaa;
    }
  </style>

  <!-- Load d3.js -->
  <script src='https://d3js.org/d3.v5.js'></script>
</head>

<body>

  <script type="module">
//    const data = [{"seq":1,"name":"PACE 111B","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":1},{"seq":2,"name":"LIBS 150","credits":1,"color":"green","textcolor":"white","prerequisite":"","period":1},{"seq":3,"name":"WRTG 111","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":1},{"seq":4,"name":"WRTG 112","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":2},{"seq":5,"name":"NUTR 100","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":2},{"seq":6,"name":"BMGT 110","credits":3,"color":"blue","textcolor":"white","prerequisite":"","period":2},{"seq":7,"name":"SPCH 100","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":3},{"seq":8,"name":"STAT 200","credits":3,"color":"red","textcolor":"white","prerequisite":"","period":3},{"seq":9,"name":"IFSM 300","credits":3,"color":"red","textcolor":"white","prerequisite":"","period":3},{"seq":10,"name":"ACCT 220","credits":3,"color":"blue","textcolor":"white","prerequisite":"","period":4},{"seq":11,"name":"HUMN 100","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":4},{"seq":12,"name":"BIOL 103","credits":4,"color":"green","textcolor":"white","prerequisite":"","period":5},{"seq":13,"name":"ECON 201","credits":3,"color":"red","textcolor":"white","prerequisite":"","period":4},{"seq":14,"name":"ARTH 334","credits":3,"color":"green","textcolor":"white","prerequisite":"","period":5},{"seq":15,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":6},{"seq":16,"name":"ECON 203","credits":3,"color":"red","textcolor":"white","prerequisite":"","period":6},{"seq":17,"name":"ACCT 221","credits":3,"color":"blue","textcolor":"white","prerequisite":"ACCT 220","period":6},{"seq":18,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":7},{"seq":19,"name":"BMGT 364","credits":3,"color":"blue","textcolor":"white","prerequisite":"","period":7},{"seq":20,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":7},{"seq":21,"name":"BMGT 365","credits":3,"color":"blue","textcolor":"white","prerequisite":"BMGT 364","period":8},{"seq":22,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":8},{"seq":23,"name":"MRKT 310","credits":3,"color":"blue","textcolor":"white","prerequisite":"","period":8},{"seq":24,"name":"WRTG 394","credits":3,"color":"green","textcolor":"white","prerequisite":"WRTG 112","period":9},{"seq":25,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":9},{"seq":26,"name":"BMGT 380","credits":3,"color":"blue","textcolor":"white","prerequisite":"","period":9},{"seq":27,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":10},{"seq":28,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":10},{"seq":29,"name":"HRMN 300","credits":3,"color":"blue","textcolor":"white","prerequisite":"","period":10},{"seq":30,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":11},{"seq":31,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":11},{"seq":32,"name":"FINC 330","credits":3,"color":"blue","textcolor":"white","prerequisite":"ACCT 221 & STAT 200","period":11},{"seq":33,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":12},{"seq":34,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":12},{"seq":35,"name":"BMGT 496","credits":3,"color":"blue","textcolor":"white","prerequisite":"","period":12},{"seq":36,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":13},{"seq":37,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":13},{"seq":38,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":13},{"seq":39,"name":"ELECTIVE","credits":3,"color":"yellow","textcolor":"black","prerequisite":"","period":14},{"seq":40,"name":"BMGT 495","credits":3,"color":"blue","textcolor":"white","prerequisite":"BMGT 365 & MRKT 310 & FINC 330","period":14},{"seq":41,"name":"CAPSTONE","credits":1,"color":"yellow","textcolor":"black","prerequisite":"FINC 330","period":14}];
    const screenWidth = 800;
    const boxWidth = 110;
    const boxHeight = 40;
    const textOffsetX = 10; 
    const textOffsetY = 25;
    const sessionOffset = 60;
    const headerRow = 20;

    // Define x coordinates for rectangles
    var bin = [10];
    // if sessions, then 2.25*boxWidth else boxWidth
    for (let k=0; k <=30; k++) {
        bin.push(bin[k] + 20 + 2.25*boxWidth);
    }
    
    // Define y coordinates for rectangles
    const yGap = 4;
    const boxSpace = boxHeight + yGap;
    var row = [80];
    for (let k=0; k <=10; k++) {
        row.push(row[k] + boxSpace);
    }

    var semesterData = [];
    const seasons = ['WINTER', 'SPRING', 'SUMMER', 'FALL'];
    for (let year = 2024; year <= 2040; year++) {
      for (let season of seasons) {
        semesterData.push(`${season} ${year}`);
      }
    }
    
    function render(data) {
      // Start Nested Functions
      function drawColumn(period, data) {
        var filteredData = data.filter(item => item.period === period);
        var anyItem = false;
        for (let j = 0; j < data.length; j++) {
          let offset = (j % 3 - 1) * sessionOffset;
          let item = filteredData[j];
          if (item !== undefined) {
            anyItem = true;
            let fullname = `${item.name} (${item.credits})`;
            drawRectangle(bin[period]+offset, row[j], fullname, item.color, item.textcolor);
          }
        }
        if (anyItem) {
          drawHeader(bin[period], 20, semesterData[period-1], 'lightgray', 'black');
        }
      }
      function drawRectangle(x, y, name, color, textcolor, description='') {
        var g = zoomable.append("g");
        g.append("rect")
          .attr("x", x)
          .attr("y", y)
          .attr("width", boxWidth)
          .attr("height", boxHeight)
          .style("fill", color)
          .classed("movable", true); // Add the "movable" class
        g.append("text")
          .attr("x", x + textOffsetX)
          .attr("y", y + textOffsetY)
          .text(name)
          .attr("fill", textcolor)
          .style("font-size", "12px")
          .style("font-family", "Arial")
          .style("font-weight", "bold")
          .classed("movable", true); // Add the "movable" class
        // Add a tooltip
        g.append("description")
          .text(description);
      }
      function drawHeader(x, y, name, color, textcolor, description='') {
        var g = zoomable.append("g");
        g.append("rect")
          .attr("x", x - sessionOffset)
          .attr("y", y)
          .attr("width", boxWidth + 2 * sessionOffset)
          .attr("height", boxHeight)
          .style("fill", color)
          .classed("movable", true); // Add the "movable" class
        g.append("text")
          .attr("x", x - sessionOffset + 2*textOffsetX)
          .attr("y", y + textOffsetY)
          .text(name)
          .attr("fill", textcolor)
          .style("font-size", "14px")
          .style("font-family", "Arial")
          .style("font-weight", "bold")
          .classed("movable", true); // Add the "movable" class
        // Add a tooltip
        g.append("description")
          .text(description);
      }
      // End of Nested Functions
      zoomable.selectAll(".movable").remove();
      var maxPeriod = Math.max(...data.map(item => item.period));
      for (let j = 0; j <= maxPeriod; j++) {
        drawColumn(j, data);
      }
    }

    // Select the body
    var body = d3.select("body");

    // Zoom behavior
    var svg = body.append('svg')
      .attr('id', 'datavizArea')
      .attr('height', 300)
      .attr('width', 900);
    var zoomable = svg.append("g");
    var zoom = d3.zoom()
      .on("zoom", function() {
        zoomable.attr("transform", d3.event.transform);
      });
    svg.call(zoom)
        .call(zoom.transform, d3.zoomIdentity.scale(0.80).translate(-160, 0));

    render({data});
  
  </script>

</body>
</html>
'''

In [38]:
html_code = '''
<!DOCTYPE html>
<html>
<head>

    // Zoom behavior
    var svg = body.append('svg')
      .attr('id', 'datavizArea')
      .attr('height', 300)
      .attr('width', 900);
    var zoomable = svg.append("g");
    var zoom = d3.zoom()
      .on("zoom", function() {
        zoomable.attr("transform", d3.event.transform);
      });
    svg.call(zoom)
        .call(zoom.transform, d3.zoomIdentity.scale(0.80).translate(-160, 0));

    render({data});
  
  </script>

</body>
</html>
'''

In [40]:
html_code.format(data="TEST")

KeyError: '\n      text-align'

In [41]:
html_code = '''
<!DOCTYPE html>
<html>
<head>
  <style>
    body {{
      text-align: center;
    }}
    
    svg {{
      margin-top: 12px;
      border: 1px solid #aaa;
    }}
  </style>

  <!-- Load d3.js -->
  <script src='https://d3js.org/d3.v5.js'></script>
</head>

<body>

  <script type="module">
    const screenWidth = 800;
    const boxWidth = 110;
    const boxHeight = 40;
    const textOffsetX = 10; 
    const textOffsetY = 25;
    const sessionOffset = 60;
    const headerRow = 20;

    // Define x coordinates for rectangles
    var bin = [10];
    // if sessions, then 2.25*boxWidth else boxWidth
    for (let k=0; k <=30; k++) {{
        bin.push(bin[k] + 20 + 2.25*boxWidth);
    }}
    
    // Define y coordinates for rectangles
    const yGap = 4;
    const boxSpace = boxHeight + yGap;
    var row = [80];
    for (let k=0; k <=10; k++) {{
        row.push(row[k] + boxSpace);
    }}

    var semesterData = [];
    const seasons = ['WINTER', 'SPRING', 'SUMMER', 'FALL'];
    for (let year = 2024; year <= 2040; year++) {{
      for (let season of seasons) {{
        semesterData.push(`${{season}} ${{year}}`);
      }}
    }}
    
    function render(data) {{
      // Start Nested Functions
      function drawColumn(period, data) {{
        var filteredData = data.filter(item => item.period === period);
        var anyItem = false;
        for (let j = 0; j < data.length; j++) {{
          let offset = (j % 3 - 1) * sessionOffset;
          let item = filteredData[j];
          if (item !== undefined) {{
            anyItem = true;
            let fullname = `${{item.name}} (${{item.credits}})`;
            drawRectangle(bin[period]+offset, row[j], fullname, item.color, item.textcolor);
          }}
        }}
        if (anyItem) {{
          drawHeader(bin[period], 20, semesterData[period-1], 'lightgray', 'black');
        }}
      }}
      function drawRectangle(x, y, name, color, textcolor, description='') {{
        var g = zoomable.append("g");
        g.append("rect")
          .attr("x", x)
          .attr("y", y)
          .attr("width", boxWidth)
          .attr("height", boxHeight)
          .style("fill", color)
          .classed("movable", true); // Add the "movable" class
        g.append("text")
          .attr("x", x + textOffsetX)
          .attr("y", y + textOffsetY)
          .text(name)
          .attr("fill", textcolor)
          .style("font-size", "12px")
          .style("font-family", "Arial")
          .style("font-weight", "bold")
          .classed("movable", true); // Add the "movable" class
        // Add a tooltip
        g.append("description")
          .text(description);
      }}
      function drawHeader(x, y, name, color, textcolor, description='') {{
        var g = zoomable.append("g");
        g.append("rect")
          .attr("x", x - sessionOffset)
          .attr("y", y)
          .attr("width", boxWidth + 2 * sessionOffset)
          .attr("height", boxHeight)
          .style("fill", color)
          .classed("movable", true); // Add the "movable" class
        g.append("text")
          .attr("x", x - sessionOffset + 2*textOffsetX)
          .attr("y", y + textOffsetY)
          .text(name)
          .attr("fill", textcolor)
          .style("font-size", "14px")
          .style("font-family", "Arial")
          .style("font-weight", "bold")
          .classed("movable", true); // Add the "movable" class
        // Add a tooltip
        g.append("description")
          .text(description);
      }}
      // End of Nested Functions
      zoomable.selectAll(".movable").remove();
      var maxPeriod = Math.max(...data.map(item => item.period));
      for (let j = 0; j <= maxPeriod; j++) {{
        drawColumn(j, data);
      }}
    }}

    // Select the body
    var body = d3.select("body");

    // Zoom behavior
    var svg = body.append('svg')
      .attr('id', 'datavizArea')
      .attr('height', 300)
      .attr('width', 900);
    var zoomable = svg.append("g");
    var zoom = d3.zoom()
      .on("zoom", function() {{
        zoomable.attr("transform", d3.event.transform);
      }});
    svg.call(zoom)
        .call(zoom.transform, d3.zoomIdentity.scale(0.80).translate(-160, 0));

    render({data});
  
  </script>

</body>
</html>
'''


In [42]:
html_code.format(data="TEST")

'\n<!DOCTYPE html>\n<html>\n<head>\n  <style>\n    body {\n      text-align: center;\n    }\n    \n    svg {\n      margin-top: 12px;\n      border: 1px solid #aaa;\n    }\n  </style>\n\n  <!-- Load d3.js -->\n  <script src=\'https://d3js.org/d3.v5.js\'></script>\n</head>\n\n<body>\n\n  <script type="module">\n    const screenWidth = 800;\n    const boxWidth = 110;\n    const boxHeight = 40;\n    const textOffsetX = 10; \n    const textOffsetY = 25;\n    const sessionOffset = 60;\n    const headerRow = 20;\n\n    // Define x coordinates for rectangles\n    var bin = [10];\n    // if sessions, then 2.25*boxWidth else boxWidth\n    for (let k=0; k <=30; k++) {\n        bin.push(bin[k] + 20 + 2.25*boxWidth);\n    }\n    \n    // Define y coordinates for rectangles\n    const yGap = 4;\n    const boxSpace = boxHeight + yGap;\n    var row = [80];\n    for (let k=0; k <=10; k++) {\n        row.push(row[k] + boxSpace);\n    }\n\n    var semesterData = [];\n    const seasons = [\'WINTER\', \'S