In [9]:
from spire.doc import *
from spire.doc.common import *
import json
import re
import os
from jinja2 import Environment, FileSystemLoader

In [10]:
# Create a Document object
document = Document()

In [11]:
def extract_text(textFilePath):

    # Load a Word document
  document.LoadFromFile(textFilePath)

  # Extract the text of the document
  document_text = document.GetText()

  #Substitutes '\t' with 1 '\n'
  document_text = re.sub(r'\t{1,}', '\n', document_text)
  #Trim the spaces after newline
  document_text = re.sub(r'\n\s+', '\n', document_text)
  #Substitutes '\r' with ''
  document_text = document_text.replace('\r', '')

  with open("Output/DocumentText.txt", "w", encoding="utf-8") as file:
    file.write(document_text)

In [12]:
# JSON Structure:

# [
#   CourseName-Num: ,
#   Total-Teaching-Hours: ,
#   Max-Marks: ,
#   Credits: ,
#   Course-Outcomes: [
#     CO1: ,
#   CO2: ,
#   CO3: ,
#   ],
#   Unit1: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Unit2: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Unit3: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Unit4: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Unit5: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Text-books: [],
#   Web-resources: [],
# ]

In [13]:
def storeJson1(input_file, output_file):
    # Read the content of the input file
    with open(input_file, 'r') as file:
        processed_file = file.read()

    # Initialize storage for extracted data
    extracted_data = {
        "CourseName-Num": "",
        "Total-Teaching-Hours": "",
        "Max-Marks": "",
        "Credits": "",
        "Course-Outcomes": [],
        "Units": [],
        "Text-books": [],
        "Web-resources": []
    }

    # Extract basic course details
    extracted_data["CourseName-Num"] = re.search(r'^MCA\d{3}– [^\n]+', processed_file, re.MULTILINE).group(0).strip()
    extracted_data["Total-Teaching-Hours"] = re.search(r'Total Teaching Hours for Semester: (\d+)', processed_file).group(1)
    extracted_data["Max-Marks"] = re.search(r'Max Marks: (\d+)', processed_file).group(1)
    extracted_data["Credits"] = re.search(r'Credits: (\d+)', processed_file).group(1)

    # Extract course outcomes
    outcomes = re.findall(r'CO\d+: (.+)', processed_file)
    for idx, outcome in enumerate(outcomes, start=1):
        extracted_data["Course-Outcomes"].append({f"CO{idx}": outcome.strip()})

    # Extract unit details
    unit_pattern = r'Unit-(\d+)\nTeaching Hours: (\d+)\n([^\n]+)\n(.+?)(?=Unit-\d+|Text Books and Reference Books)'
    units = re.findall(unit_pattern, processed_file, re.DOTALL)
    for unit in units:
        unit_data = {
            "Teaching-hours": unit[1],
            "Title": unit[2].strip(),
            "Contents": [content.strip() for content in re.split(r'(?<=\.)\s+', unit[3].strip()) if content],
            "Lab-exercises": re.findall(r'Lab Exercises:\n(.+)', unit[3], re.DOTALL)
        }
        extracted_data["Units"].append(unit_data)

    # Extract text-books
    textbooks = re.findall(r'\[\d+\] ([^\n]+)', processed_file)
    extracted_data["Text-books"] = textbooks

    # Extract web resources
    web_resources = re.findall(r'https?://[^\s]+', processed_file)
    extracted_data["Web-resources"] = web_resources

    # Save extracted data to a JSON file
    with open(output_file, 'w') as json_file:
        json.dump(extracted_data, json_file, indent=4)

In [23]:
import re
import json

def storeJson(input_file, output_file):
    # Read the content of the input file
    with open(input_file, 'r') as file:
        processed_file = file.read()

    # Initialize storage for extracted data
    extracted_data = {
        "CourseName-Num": "",
        "Total-Teaching-Hours": "",
        "Max-Marks": "",
        "Credits": "",
        "Course-Outcomes": [],
        "Units": [],
        "Text-books": [],
        "Web-resources": []
    }

    # Extract basic course details
    extracted_data["CourseName-Num"] = re.search(r'^MCA\d{3}– [^\n]+', processed_file, re.MULTILINE).group(0).strip()
    extracted_data["Total-Teaching-Hours"] = re.search(r'Total Teaching Hours for Semester: (\d+)', processed_file).group(1)
    extracted_data["Max-Marks"] = re.search(r'Max Marks: (\d+)', processed_file).group(1)
    extracted_data["Credits"] = re.search(r'Credits: (\d+)', processed_file).group(1)

    # Extract course outcomes
    outcomes = re.findall(r'CO\d+: (.+)', processed_file)
    for idx, outcome in enumerate(outcomes, start=1):
        extracted_data["Course-Outcomes"].append({f"CO{idx}": outcome.strip()})

    # Extract unit details with improved parsing
    def extract_unit_details(unit_text):
        # Split the unit text into sections
        sections = re.split(r'\n(?=\w)', unit_text)

        # Initialize unit data
        unit_data = {
            "Teaching-hours": "",
            "Title": "",
            "Contents": [],
            "Lab-exercises": []
        }

        # Extract teaching hours and title
        teaching_hours_match = re.search(r'Teaching Hours: (\d+)', unit_text)
        if teaching_hours_match:
            unit_data["Teaching-hours"] = teaching_hours_match.group(1)

        # Find the title (first non-empty line after teaching hours)
        title_match = re.search(r'Teaching Hours: \d+\n(.+)', unit_text)
        if title_match:
            unit_data["Title"] = title_match.group(1).strip()

        # Flag to track if we're in lab exercises section
        in_lab_exercises = False

        # Process sections
        for section in sections:
            # Clean up the section
            section = section.strip()

            # Check for lab exercises section
            if "Lab Exercises:" in section:
                in_lab_exercises = True
                continue

            # Skip if section is empty
            if not section:
                continue

            # Process contents or lab exercises
            if in_lab_exercises:
                # Only add non-numeric entries to lab exercises
                if not section.replace('.', '').isdigit():
                    # Replace newlines with actual line breaks
                    section = section.replace('\\n', '\n')
                    unit_data["Lab-exercises"].append(section)
            else:
                # Process contents
                # Remove purely numeric entries
                if not section.replace('.', '').isdigit():
                    # Replace newlines with actual line breaks
                    section = section.replace('\\n', '\n')
                    unit_data["Contents"].append(section)

        return unit_data

    # Extract units
    unit_pattern = r'Unit-(\d+)[\s\S]+?(?=Unit-\d+|Text Books and Reference Books)'
    units = re.findall(unit_pattern, processed_file)

    for unit in units:
        # Find the full unit text
        unit_text = re.search(fr'Unit-{unit}[\s\S]+?(?=Unit-\d+|Text Books and Reference Books)', processed_file)
        if unit_text:
            unit_details = extract_unit_details(unit_text.group(0))
            extracted_data["Units"].append(unit_details)

    # Extract text-books
    textbooks = re.findall(r'\[\d+\] ([^\n]+)', processed_file)
    extracted_data["Text-books"] = textbooks

    # Extract web resources
    web_resources = re.findall(r'https?://[^\s]+', processed_file)
    extracted_data["Web-resources"] = web_resources

    # Save extracted data to a JSON file
    with open(output_file, 'w') as json_file:
        json.dump(extracted_data, json_file, indent=4, ensure_ascii=False)

    print(f"Data extracted and saved to {output_file}")

# Usage
input_file = 'Output/DocumentText.txt'
output_file = 'Output/CourseDetails.json'
storeJson(input_file, output_file)

In [14]:
document.Close()

# Things needed (externally)
- Evaluation Pattern (CIA and ETE)
- Trimester
- Faculty name and contacts
- Class policies and description
- CIA and ETE component description (like Practical tests, Group project, mcqs, presentations, etc)
- Duration of the course (Start and End Date)

In [15]:
def makeCoursePlan(filePath):
    json_file = filePath
    with open(json_file, 'r', encoding='utf-8') as file:
        data = json.load(file)

    output_folder = 'Output'
    os.makedirs(output_folder, exist_ok=True)

    template_content = """
        <!DOCTYPE html>
        <html>
        <head>
            <title>{{ CourseName_Num }}</title>
            <style>
                body {
                    font-family: 'Book Antiqua', serif;
                    margin: 20px;
                    line-height: 1.6;
                }
                h1, h2, h3 {
                    color: #333;
                }
                .unit {
                    margin-bottom: 20px;
                }
                .center {
                    text-align: center;
                }
                .bold {
                    font-weight: bold;
                }
                table {
                    width: 100%;
                    border-collapse: collapse;
                    margin: 20px 0;
                }
                th, td {
                    border: 1px solid #ddd;
                    padding: 8px;
                    text-align: left;
                }
                th {
                    background-color: #f2f2f2;
                }
            </style>
        </head>
        <body>
            <!-- Header Section -->
            <div class="center">
                <img src="http://kp.christuniversity.in/KnowledgePro/Logo.jpg" alt="University Logo" width="300" height="100">
                <p class="bold" style="font-size: 16pt;">DEPARTMENT - COMPUTER SCIENCE</p>
                <p class="bold" style="font-size: 15pt;">
                    COURSE PACK<br>
                    for<br>
                    {{ CourseName_Num }}
                </p>
                <hr>
            </div>

            <!-- Course Details -->
            <div class="center">
                <p class="bold" style="font-size: 16pt;">{{ CourseName_Num }}</p>
            </div>
            <p><strong>Total Teaching Hours For Semester:</strong> {{ Total_Teaching_Hours }}</p>
            <p><strong>Lecture Hours per Week:</strong> 8 (4+4)</p>
            <p><strong>Maximum Marks:</strong> {{ Max_Marks }}</p>
            <p><strong>Credits:</strong> {{ Credits }}</p>

            <!-- Course Description and Objectives -->
            <h2>Course Description and Course Objectives</h2>
            <p>{{ Course_Description }}</p>

            <!-- Course Outcomes -->
            <h2>Course Outcomes</h2>
            <ul>
                {% for outcome in Course_Outcomes %}
                <li>{{ outcome.values() | list | first }}</li>
                {% endfor %}
            </ul>

            <!-- Units Section -->
            <h2>Units</h2>
            {% for unit in Units %}
            <div class="unit">
                <h3>{{ unit.Title }}</h3>
                <p><strong>Teaching Hours:</strong> {{ unit.Teaching_hours }}</p>
                <p><strong>Contents:</strong></p>
                <ul>
                    {% for content in unit.Contents %}
                    <li>{{ content }}</li>
                    {% endfor %}
                </ul>
                <p><strong>Lab Exercises:</strong></p>
                <ul>
                    {% for exercise in unit.Lab_exercises %}
                    <li>{{ exercise }}</li>
                    {% endfor %}
                </ul>
            </div>
            {% endfor %}

            <!-- Textbooks -->
            <h2>Essential References</h2>
            <ul>
                {% for book in Text_books %}
                <li>{{ book }}</li>
                {% endfor %}
            </ul>

            <!-- Web Resources -->
            <h2>Recommended References</h2>
            <ul>
                {% for resource in Web_resources %}
                <li><a href="{{ resource }}">{{ resource }}</a></li>
                {% endfor %}
            </ul>

            <!-- Table Section -->
            <h2>Course Plan Guide</h2>
            <table>
                <tr>
                    <th>Trimester</th>
                    <td>II</td>
                    <th>Class</th>
                    <td>MCA</td>
                </tr>
                <tr>
                    <th>Course Code</th>
                    <td>MCA272</td>
                    <th>Course Title</th>
                    <td>Programming in Java</td>
                </tr>
                <tr>
                    <th>Hours</th>
                    <td>90</td>
                    <th>Hours per week</th>
                    <td>8 (4+4)</td>
                </tr>
                <tr>
                    <th>Faculty name</th>
                    <td colspan="3">Dr. Suresh K / Dr. Cynthia T</td>
                </tr>
                <tr>
                    <th>Contact details</th>
                    <td colspan="3">
                        suresh.kalaimani@christuniversity.in<br>
                        Central Block, 8th floor, Room No - 810<br>
                        9003310571<br>
                        cynthia.t@christuniversity.in<br>
                        Central Block, 8th floor, Room No - 810<br>
                        9952239349
                    </td>
                </tr>
                <tr>
                    <th>Class policies and guidelines</th>
                    <td colspan="3">
                        Please ensure strict compliance with the class policies of the University/Department as outlined in the following link: <a href="https://christuniversity.in/general-regulations">https://christuniversity.in/general-regulations</a>.<br>
                        Students must adhere to the timetable and be present in their designated classrooms on time. Attendance will be taken within the first 5 minutes, and latecomers will not be permitted for attendance.<br>
                        All communications regarding the course will be posted in the Google Classroom/Moodle.<br>
                        Go through the course content/resources shared in Google Drive/Moodle before coming to the class.<br>
                        Prior permission must be sought for the use of Laptops in the classroom only if it is required.<br>
                        All the programs must be uploaded to the GitHub account and the GitHub account must be associated with your official Christ mail ID.
                    </td>
                </tr>
                <tr>
                    <th>Course Description</th>
                    <td colspan="3">
                        On completion of this course, a student will be familiar with object-oriented programming, its implementation using Java programming language and developing web applications and enterprise applications using Servlets, JSP, and Beans.
                    </td>
                </tr>
                <tr>
                    <th>Course Objectives</th>
                    <td colspan="3">
                        This course will help the learner to gain sound knowledge in object-oriented principles, GUI application design with databases, and enterprise application design with Servlets.
                    </td>
                </tr>
                <tr>
                    <th>Course Outcomes</th>
                    <td colspan="3">
                        CO1: Understanding and applying the principles of object-oriented programming in the construction of robust, maintainable programs.<br>
                        CO2: Analyze the various societal and environmental problems critically to develop solutions using the features of programming language.<br>
                        CO3: Develop sustainable and innovative solutions for real-time problems.
                    </td>
                </tr>
            </table>

            <!-- Footer -->
            <div class="center">
                <h2>Evaluation Pattern</h2>
                <p>CIA: 50% &nbsp;&nbsp; ESE: 50%</p>
            </div>
        </body>
        </html>
    """

    env = Environment(loader=FileSystemLoader('.'))
    template = env.from_string(template_content)
    rendered_html = template.render(
        CourseName_Num=data["CourseName-Num"],
        Total_Teaching_Hours=data["Total-Teaching-Hours"],
        Max_Marks=data["Max-Marks"],
        Credits=data["Credits"],
        Course_Outcomes=data["Course-Outcomes"],
        Units=data["Units"],
        Text_books=data["Text-books"],
        Web_resources=data["Web-resources"]
    )

    output_path = "Output/CourseDetails.html"
    with open(output_path, 'w', encoding='utf-8') as file:
        file.write(rendered_html)

    print(f"HTML file generated and saved to {output_path}")

In [16]:
doc_file = "CoursePlans/Syllabus_MCA272– Programming Using Java.docx"
txt_file = "Output/DocumentText.txt" #Path to the txt document from which the JSON will be made
json_file = "Output/CourseDetails.json" #Path of JSON from which the html course plan will be made

extract_text(doc_file)
storeJson(txt_file, json_file)
makeCoursePlan(json_file)

HTML file generated and saved to Output/CourseDetails.html
