In [18]:
from spire.doc import *
from spire.doc.common import *
import json
import re
import os
from jinja2 import Environment, FileSystemLoader

In [19]:
# Create a Document object
document = Document()

In [20]:
def extract_text():

    # Load a Word document
  document.LoadFromFile("CoursePlans/Syllabus_MCA272– Programming Using Java.docx")

  # Extract the text of the document
  document_text = document.GetText()

  #Substitutes '\t' with 1 '\n'
  document_text = re.sub(r'\t{1,}', '\n', document_text)
  #Trim the spaces after newline
  document_text = re.sub(r'\n\s+', '\n', document_text)
  #Substitutes '\r' with ''
  document_text = document_text.replace('\r', '')

  with open("Output/DocumentText.txt", "w", encoding="utf-8") as file:
    file.write(document_text)

In [21]:
# JSON Structure:

# [
#   CourseName-Num: ,
#   Total-Teaching-Hours: ,
#   Max-Marks: ,
#   Credits: ,
#   Course-Outcomes: [
#     CO1: ,
#   CO2: ,
#   CO3: ,
#   ],
#   Unit1: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Unit2: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Unit3: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Unit4: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Unit5: [
#     Teaching-hours: ,
#     Title: ,
#     Contents: [],
#     Lab-exercises: [],
#         ],
#   Text-books: [],
#   Web-resources: [],
# ]

In [22]:
def storeJson(input_file, output_file):
    # Read the content of the input file
    with open(input_file, 'r') as file:
        processed_file = file.read()

    # Initialize storage for extracted data
    extracted_data = {
        "CourseName-Num": "",
        "Total-Teaching-Hours": "",
        "Max-Marks": "",
        "Credits": "",
        "Course-Outcomes": [],
        "Units": [],
        "Text-books": [],
        "Web-resources": []
    }

    # Extract basic course details
    extracted_data["CourseName-Num"] = re.search(r'^MCA\d{3}– [^\n]+', processed_file, re.MULTILINE).group(0).strip()
    extracted_data["Total-Teaching-Hours"] = re.search(r'Total Teaching Hours for Semester: (\d+)', processed_file).group(1)
    extracted_data["Max-Marks"] = re.search(r'Max Marks: (\d+)', processed_file).group(1)
    extracted_data["Credits"] = re.search(r'Credits: (\d+)', processed_file).group(1)

    # Extract course outcomes
    outcomes = re.findall(r'CO\d+: (.+)', processed_file)
    for idx, outcome in enumerate(outcomes, start=1):
        extracted_data["Course-Outcomes"].append({f"CO{idx}": outcome.strip()})

    # Extract unit details
    unit_pattern = r'Unit-(\d+)\nTeaching Hours: (\d+)\n([^\n]+)\n(.+?)(?=Unit-\d+|Text Books and Reference Books)'
    units = re.findall(unit_pattern, processed_file, re.DOTALL)
    for unit in units:
        unit_data = {
            "Teaching-hours": unit[1],
            "Title": unit[2].strip(),
            "Contents": [content.strip() for content in re.split(r'(?<=\.)\s+', unit[3].strip()) if content],
            "Lab-exercises": re.findall(r'Lab Exercises:\n(.+)', unit[3], re.DOTALL)
        }
        extracted_data["Units"].append(unit_data)

    # Extract text-books
    textbooks = re.findall(r'\[\d+\] ([^\n]+)', processed_file)
    extracted_data["Text-books"] = textbooks

    # Extract web resources
    web_resources = re.findall(r'https?://[^\s]+', processed_file)
    extracted_data["Web-resources"] = web_resources

    # Save extracted data to a JSON file
    with open(output_file, 'w') as json_file:
        json.dump(extracted_data, json_file, indent=4)

In [23]:
input_file = "Output/DocumentText.txt"
output_file = "Output/CourseDetails.json"

# Write the extracted text into a text file
extract_text()
storeJson(input_file, output_file)


In [24]:
document.Close()

# Things needed (externally)
- Evaluation Pattern (CIA and ETE)
- Trimester
- Faculty name and contacts
- Class policies and description
- CIA and ETE component description (like Practical tests, Group project, mcqs, presentations, etc)
- Duration of the course (Start and End Date)

In [25]:
json_file = 'Output/CourseDetails.json'  # Replace with the path to your JSON file
with open(json_file, 'r', encoding='utf-8') as file:
    data = json.load(file)

output_folder = 'Output'
os.makedirs(output_folder, exist_ok=True)

template_content = """
<!DOCTYPE html>
<html>
<head>
    <title>{{ CourseName_Num }}</title>
    <style>
        body {
            font-family: 'Book Antiqua', serif;
            margin: 20px;
            line-height: 1.6;
        }
        h1, h2, h3 {
            color: #333;
        }
        .unit {
            margin-bottom: 20px;
        }
        .center {
            text-align: center;
        }
        .bold {
            font-weight: bold;
        }
    </style>
</head>
<body>
    <!-- Header Section -->
    <div class="center">
        <img src="http://kp.christuniversity.in/KnowledgePro/Logo.jpg" alt="University Logo" width="300" height="100">
        <p class="bold" style="font-size: 16pt;">DEPARTMENT - COMPUTER SCIENCE</p>
        <p class="bold" style="font-size: 15pt;">
            COURSE PACK<br>
            for<br>
            {{ CourseName_Num }}
        </p>
        <hr>
    </div>

    <!-- Course Details -->
    <div class="center">
        <p class="bold" style="font-size: 16pt;">{{ CourseName_Num }}</p>
    </div>
    <p><strong>Total Teaching Hours For Semester:</strong> {{ Total_Teaching_Hours }}</p>
    <p><strong>Lecture Hours per Week:</strong> 8 (4+4)</p>
    <p><strong>Maximum Marks:</strong> {{ Max_Marks }}</p>
    <p><strong>Credits:</strong> {{ Credits }}</p>

    <!-- Course Description and Objectives -->
    <h2>Course Description and Course Objectives</h2>
    <p>{{ Course_Description }}</p>

    <!-- Course Outcomes -->
    <h2>Course Outcomes</h2>
    <ul>
        {% for outcome in Course_Outcomes %}
        <li>{{ outcome.values() | list | first }}</li>
        {% endfor %}
    </ul>

    <!-- Units Section -->
    <h2>Units</h2>
    {% for unit in Units %}
    <div class="unit">
        <h3>{{ unit.Title }}</h3>
        <p><strong>Teaching Hours:</strong> {{ unit.Teaching_hours }}</p>
        <p><strong>Contents:</strong></p>
        <ul>
            {% for content in unit.Contents %}
            <li>{{ content }}</li>
            {% endfor %}
        </ul>
        <p><strong>Lab Exercises:</strong></p>
        <ul>
            {% for exercise in unit.Lab_exercises %}
            <li>{{ exercise }}</li>
            {% endfor %}
        </ul>
    </div>
    {% endfor %}

    <!-- Textbooks -->
    <h2>Essential References</h2>
    <ul>
        {% for book in Text_books %}
        <li>{{ book }}</li>
        {% endfor %}
    </ul>

    <!-- Web Resources -->
    <h2>Recommended References</h2>
    <ul>
        {% for resource in Web_resources %}
        <li><a href="{{ resource }}">{{ resource }}</a></li>
        {% endfor %}
    </ul>

    <!-- Footer -->
    <div class="center">
        <h2>Evaluation Pattern</h2>
        <p>CIA: 50% &nbsp;&nbsp; ESE: 50%</p>
    </div>
</body>
</html>
"""

env = Environment(loader=FileSystemLoader('.'))
template = env.from_string(template_content)
rendered_html = template.render(
    CourseName_Num=data["CourseName-Num"],
    Total_Teaching_Hours=data["Total-Teaching-Hours"],
    Max_Marks=data["Max-Marks"],
    Credits=data["Credits"],
    Course_Outcomes=data["Course-Outcomes"],
    Units=data["Units"],
    Text_books=data["Text-books"],
    Web_resources=data["Web-resources"]
)

# Step 5: Save the rendered HTML to a file
output_path = "Output/CourseDetails.html"
with open(output_path, 'w', encoding='utf-8') as file:
    file.write(rendered_html)

print(f"HTML file generated and saved to {output_file}")

HTML file generated and saved to Output/CourseDetails.json
