In [None]:


source = "/Users/junjie/SIT/TLM3001-DP-PhysioLLM/PatellofemoralPainSyndrome_2 (1).pdf"  # PDF path or URL
result2 = doc_converter.convert(source)
md_text = result2.document.export_to_markdown()


Fetching 9 files: 100%|██████████| 9/9 [00:00<00:00, 163414.44it/s]


In [9]:
import os
import re
import json
from langchain.schema import Document
from langchain_community.vectorstores import qdrant
from langchain_community.document_loaders.directory import DirectoryLoader
from langchain_community.document_loaders.pdf import PyPDFLoader
from langchain_huggingface import HuggingFaceEmbeddings
from pymupdf4llm import to_markdown
from dotenv import load_dotenv
import openai

def extract_condition_from_content(markdown_content: str, word_limit=200):
    """Extract the main condition from the content of the Markdown file using ChatGPT."""
    words = markdown_content

    print(f"Debug: Extracting condition from content with word limit of {word_limit}")

    prompt = f"""The following text is extracted from a medical PDF. Based on the text below, identify the main medical condition or topic discussed. Provide a concise name for the condition.

    Text: {words}

    Main Condition:"""

    try:

        print(f"Debug: Prompt for OpenAI API:\n{prompt[:300]}...")  # Print the first 300 characters of the prompt for clarity
        # Correct method for calling OpenAI's API
        response = openai.chat.completions.create(
            model="gpt-4o-mini",  
            messages=[{"role": "user", "content": prompt}]
        )

        print(f"Debug: OpenAI response received: {response}")

        # Access the response properly
        condition = response.choices[0].message.content.strip()
        print(f"Debug: Extracted condition: {condition}")
        return condition

    except Exception as e:
        print(f"Error extracting condition from content: {e}")
        return "Unknown Condition"


In [None]:

load_dotenv()

DOCUMENT_DIR = "documents"
MARKDOWN_DIR = "./markdown_files"
JSON_DIR = "./json_files"

os.makedirs(MARKDOWN_DIR, exist_ok=True)
os.makedirs(JSON_DIR, exist_ok=True)

def convert_pdf_to_json(pdf_path):
    """
    Convert a PDF document into JSON format via Markdown and OpenAI GPT API.
    """
    try:
        # Convert PDF to Markdown
        base_name = os.path.splitext(os.path.basename(pdf_path))[0]
        markdown_path = os.path.join(MARKDOWN_DIR, f"{base_name}.md")
        json_path = os.path.join(JSON_DIR, f"{base_name}.json")

        # Check if Markdown already exists
        if not os.path.exists(markdown_path):
            print(f"Debug: Extracting Markdown from {pdf_path}...")
            markdown_content = md_text
            with open(markdown_path, "w") as md_file:
                md_file.write(markdown_content)
        else:
            with open(markdown_path, "r") as md_file:
                markdown_content = md_file.read()

        # Extract condition name
        condition_name = extract_condition_from_content(markdown_content)

        # Convert Markdown to JSON
        if not os.path.exists(json_path):
            print(f"Debug: Converting Markdown to JSON for {base_name}...")

            prompt = f"""
Convert the following structured text into JSON format. Follow these specific instructions to handle split sections, footnotes, and create a clean, structured JSON output:

1. **Hierarchical Structure**: Treat each heading or subheading as a key, creating a nested dictionary for each level.
   - Only the lowest level of headings should contain actual content.

2. **Joining Split Content**: When a sentence or phrase is cut off due to page or line breaks:
   - Detect incomplete sentences or phrases that end abruptly, especially if they end mid-sentence.
   - Look at the start of the following section to determine if it completes the previous content, and merge them seamlessly.
   - Ensure there are no duplicate words when merging (e.g., if merging "conversation" and "conversation then," keep only one instance of "conversation").

3. **Footnotes Integration**: If a footnote is present (indicated by a superscript number following a term or phrase), insert the corresponding footnote text directly into the sentence in parentheses.
   - For instance, if a phrase like "cloud infrastructure2" is followed by footnote 2 with additional details ("2 Cloud Infrastructure: Refers to hardware and software resources..."), embed the footnote text as "(Refers to hardware and software resources...)" immediately after "cloud infrastructure" in the sentence.
   - Ensure the added footnote text blends naturally with the sentence structure, avoiding any disruption to readability.

4. **Handling Explicit References**: If an item includes an explicit reference to another section or list (e.g., "See Attendee List below" or "refer to the details provided later"), replace the reference with the actual content from the specified section.
   - Detect phrases like "see below," "refer to," "see list," or other similar expressions that indicate a link to content elsewhere in the document.
   - Embed the referenced content directly in place of the phrase, ensuring it is integrated seamlessly and contextually.
   - Only form relationships when there is a clear and explicit reference, and avoid linking content that is not specifically indicated as a reference.

5. **Content Accuracy**: Copy the content exactly as it appears without summarizing, rephrasing, or adding new words. The final JSON should contain verbatim text.

6. **Final Validation**: After forming the JSON structure, perform a final check to confirm that:
   - All sentences are complete and coherent, with no fragments or hanging phrases.
   - Split sections and footnotes are correctly merged into a single, continuous text block where necessary.

Here’s the text to convert:
{markdown_content}
    """
            print(f"Debug: Sending prompt to OpenAI...")
            response = openai.chat.completions.create(
                model="gpt-4",
                messages=[{"role": "user", "content": prompt}]
            )
            json_content = response.choices[0].message.content
            # Clean JSON response and save
            cleaned_json_content = json_content.strip("```json").strip("```")
            json_data = json.loads(cleaned_json_content)
            with open(json_path, "w") as json_file:
                json.dump(json_data, json_file, indent=4)
            print(f"JSON saved to {json_path}")
        else:
            print(f"JSON for {pdf_path} already exists. Skipping conversion.")

        return json_path, condition_name
    
    except json.JSONDecodeError as e:
        print(f"Failed to decode JSON response for {pdf_path}: {e}")
        return None, None
    except Exception as e:
        print(f"Error converting PDF to JSON for {pdf_path}: {e}")
        return None, None


In [None]:
# Main execution
if __name__ == "__main__":
    pdf_files = [os.path.join(DOCUMENT_DIR, file) for file in os.listdir(DOCUMENT_DIR) if file.endswith(".pdf")]
    for pdf_file in pdf_files:
        json_path, condition_name = convert_pdf_to_json(pdf_file)
        print(f"Processed: {pdf_file} -> {json_path}, Condition: {condition_name}")

In [4]:
def postprocess_json(json_data):
    """Post-process the nested JSON to extract content with hierarchical metadata and chunk_content."""
    processed_data = []

    def process_content(key, value, metadata):
        # If the value is a dictionary, process further (this is the content or nested structure)
        if isinstance(value, dict):
            for sub_key, sub_value in value.items():
                # Further process nested dictionaries or content
                new_metadata = {**metadata, "heading2": sub_key}
                if isinstance(sub_value, dict) and "Content" in sub_value:
                    process_content("Content", sub_value["Content"], new_metadata)
        else:
            # When we reach the actual content, we add it to the processed data
            chunk_content = "no" if isinstance(value, list) else "yes"
            processed_data.append({
                "content": value,
                "metadata": {**metadata, "heading1": key, "chunk_content": chunk_content}
            })

    def traverse_json(obj, parent_metadata=None):
        """Recursively traverse JSON and extract metadata/content."""
        if parent_metadata is None:
            parent_metadata = {}

        for key, value in obj.items():
            # If value is a dictionary, we need to recurse through it to get nested content
            if isinstance(value, dict):
                new_metadata = {**parent_metadata, "heading3": key}
                process_content(key, value, new_metadata)  # Process nested content and metadata
            else:
                # If value is not a dictionary, directly process it as content
                process_content(key, value, parent_metadata)

    # Start the traversal of the nested JSON
    traverse_json(json_data)
    
    return processed_data




# Step 1: Load JSON from a file in the directory
json_directory = "/Users/junjie/SIT/TLM3001-DP-PhysioLLM/demo_v1/json_files"  # Adjust to the directory where your JSON files are stored
json_files = [f for f in os.listdir(json_directory) if f.endswith(".json")]

# Load a specific JSON file (adjust the filename as needed)
file_to_load = json_files[0]  # Change this if you want to test with a specific file
json_path = os.path.join(json_directory, file_to_load)

with open(json_path, "r") as f:
    json_data = json.load(f)

# Step 2: Apply the postprocess_json function
postprocessed_data = postprocess_json(json_data)

# Step 3: Display the output
for entry in postprocessed_data:
    print(json.dumps(entry, indent=2))


{
  "content": "This guideline is intended to provide the clinician with a guideline of the non-operative course of care for Patellofemoral Pain Syndrome. Specific intervention should be based on the needs of the individual and should consider exam findings and clinical decision making. The timeframes for expected outcomes contained within this guideline may vary. If a clinician requires assistance in the progression of a patient, they should consult with the referring provider.",
  "metadata": {
    "heading3": "Rehabilitation Protocol for Patellofemoral Pain Syndrome",
    "heading2": "Introduction",
    "heading1": "Content",
    "chunk_content": "yes"
  }
}
{
  "content": "The interventions included within this protocol are not intended to be an inclusive list. Therapeutic interventions should be included and modified based on the progress of the patient and under the discretion of the clinician.",
  "metadata": {
    "heading3": "Rehabilitation Protocol for Patellofemoral Pain Syn

In [11]:
import json
import re

mock_response = """
Here's a structured JSON output based on the provided content from both markdown sources:
```json
{
  "Rehabilitation Protocol for Patellofemoral Pain Syndrome": {
    "Introduction": {
      "content": "This guideline is intended to provide the clinician with a guideline of the non-operative course of care for Patellofemoral Pain Syndrome. Specific intervention should be based on the needs of the individual and should consider exam findings and clinical decision making. The timeframes for expected outcomes contained within this guideline may vary. If a clinician requires assistance in the progression of a patient, they should consult with the referring provider. The interventions included within this protocol are not intended to be an inclusive list. Therapeutic interventions should be included and modified based on the progress of the patient and under the discretion of the clinician. Patellofemoral Pain Syndrome (PFPS) is a general category of anterior knee pain that is characterized as pain behind or around the patella, as a result of patella malalignment, altered patellofemoral (PF) joint forces and/or repetitive stress to the area. Also known as Runner's Knee, chondromalacia patella, retropatellar pain syndrome, anterior knee pain syndrome, patellar malalignment, and patellofemoral arthralgia. Patellofemoral syndrome can have a collection of signs and symptoms which may encompass body regions throughout the kinetic chain, from the lumbar spine to the feet."
    },
    "Diagnosis Considerations": {
      "content": [
        "Pain: typically reported anywhere circumferential to the anterior knee or retropatellar region.",
        "Common Aggravating Factors: prolonged sitting, squatting, climbing/descending stairs, running, and jumping.",
        "Increased tibiofemoral varum/valgum or tibial varum: normal subjects with hypermobility exhibit larger Q angles than normal subjects with normal mobility. Patients with greater amounts of medial rotation of the femur with respect to the tibia, typically produce larger amounts of contact area at the patellofemoral joint.",
        "Foot position/footwear. Excessive or late pronation during gait can increase tibial internal rotation, thus altering patellofemoral forces.",
        "Higher-level activities which include landing with excessive hip internal rotation and/or knee valgus may contribute to abnormal PF joint loading."
      ]
    },
    "Differential Diagnosis": {
      "content": [
        "Articular cartilage injury",
        "Bone tumor",
        "Chondromalacia patella",
        "Osgood-Schlatter disease",
        "Osteochondritis dessicans",
        "Patellar stress fracture",
        "Referred pain from low back or hip",
        "Patellofemoral arthritis",
        "Hoffa's Disease",
        "Pes Anserine Bursitis",
        "Iliotibial Band Friction Syndrome",
        "Prepatellar Bursitis",
        "Inflammatory joint disease",
        "Quadriceps/Patellar tendinopathy",
        "Loose Bodies",
        "Sinding-Larsen-Johansson Syndrome",
        "Meniscal pathology",
        "Symptomatic Bipartite Patella",
        "Neuromas",
        "Synovial plica"
      ]
    },
    "PHASE I: IMMEDIATE/ACUTE (0-2 WEEKS)": {
      "Rehabilitation Goals": {
        "content": [
          "Reduce any swelling, minimize pain",
          "Restore patellar, lower extremity mobility (including hip and ankle)",
          "Restore tolerance to full motion",
          "Minimize arthrogenic muscle inhibition and re-establish quadriceps, hip control",
          "Patient education"
        ]
      },
      "Interventions": {
        "content": [
          "Minimize aggravating factors as much as possible, such as descending stairs, prolonged sitting, running, jumping.",
          "Initial self-symptom management and joint protection.",
          "Independent with initial home exercise program.",
          "During this early phase, numerous manual interventions may be utilized to reduce the patient's pain, restriction to movement, and joint loading: Soft Tissue Mobilization/Instrument-Assisted Soft Tissue Mobilization, Patellar Taping (McConnell, Kinesiotaping), Ischemic compression/Bloodflow Restrictive Training, Dry Needling, Nerve mobilization, Joint mobilization/manipulation, Strengthening, Stretching Mobility, Stationary biking for tolerable mobility (minimal resistance), Stretching/Foam rolling including Hip flexors, Hamstrings, Quadriceps, Iliotibial band, Adductors, Hip extensors/rotators, Gastroc-soleus complex, Quadriceps isometrics at 0, 45, 90 degrees of flexion, Straight leg raise, Bridge/unilateral bridging, Sidelying clamshells, Sidelying hip abduction, Core/lumbopelvic stabilization (transverse abdominus, multifidus lifts, front/side planks)"
        ]
      },
      "Criteria to Progress": {
        "content": [
          "Full knee motion, compared to uninvolved side",
          "Appropriate quad contraction with superior patella glide and full active extension",
          "Able to perform straight leg raise without lag or pain",
          "Full tolerance to weightbearing with relative knee extension"
        ]
      }
    },
    "PHASE II: INTERMEDIATE/SUB-ACUTE (2-4 WEEKS)": {
      "Rehabilitation Goals": {
        "content": [
          "Progress to closed-chain/weightbearing activities without loading of knee flexion",
          "Maintain full ROM",
          "Tolerance to closed chain strengthening without loading of knee joint in flexion",
          "Independent with progressed home exercise program, all daily activities"
        ]
      },
      "Additional Interventions": {
        "content": [
          "Strengthening: Sumo walks, Monster walks, 4-way hip drills.",
          "Balance/proprioception: Single-leg stance, Clock taps, Ball toss.",
          "Correction of movement abnormalities with functional tasks."
        ]
      },
      "Criteria to Progress": {
        "content": [
          "Tolerance to weightbearing activities",
          "Maintenance of full ROM",
          "Normalize muscle length or achieve muscle length goals"
        ]
      }
    },
    "PHASE III: LATE/CHRONIC (4-6 WEEKS)": {
      "Rehabilitation Goals": {
        "content": [
          "Maintain full ROM",
          "Promote proper movement patterns",
          "Avoid post exercise pain/swelling",
          "Achieve all muscle strength goals",
          "Negotiating stairs unlimited",
          "Full tolerance to closed chain knee joint loading with flexion, with appropriate eccentric control",
          "Achieve daily/functional goals"
        ]
      },
      "Additional Interventions": {
        "content": [
          "Strengthening: Partial squat, squat to chair, wall slide, progressing to functional squat pattern, Lunge/reverse lunge, Step ups, Step downs, eccentric loading.",
          "Correction of movement abnormalities with sport-related tasks.",
          "Return to Running Program."
        ]
      },
      "Criteria for Discharge": {
        "content": [
          "Independent self-management of symptoms",
          "Demonstrate appropriate understanding of condition and maintenance to prevent risk of recurrence"
        ]
      }
    },
    "Contact": {
      "content": "Please email MGHSportsPhysicalTherapy@partners.org with questions specific to this protocol."
    },
    "References": {
      "content": [
        "Chang WD, Chen FC, Lee CL, Lin HY, Lai PT. Effects of Kinesio Taping versus McConnell Taping for Patellofemoral Pain Syndrome: A Systematic Review and Meta-Analysis. Evidence-Based Complementary and Alternative Medicine. 2015; 1-11.",
        "Sueki D, Brechter J. Orthopedic Rehabilitation Clinical Advisor. 1st ed. Maryland Heights, Missouri: Mosby; 2009. 598-599.",
        "Arazpour M, Bahramian F, Abutorabi A, Nourbakhsh ST, Alidousti A, Hamidreza Aslani. The Effect of Patellofemoral Pain Syndrome on Gait Parameters: A Literature Review. The Archives of Bone and Joint Surgery. October 2016; 4(4): 298-306.",
        "Espí-López GV, Arnal-Gómez A, Balasch-Bernat M, Inglés M. Effectiveness of Manual Therapy Combined with Physical Therapy in Treatment of Patellofemoral Pain Syndrome: Systematic Review. Journal of Chiropractic Medicine. June 2018; 139-146.",
        "Boling MC, Padua DA, Marshall SW, Guskiewicz K, Pyne S, Beutler A. A prospective investigation of biomechanical risk factors for patellofemoral pain syndrome: the Joint Undertaking to Monitor and Prevent ACL Injury (JUMP-ACL) cohort. American Journal of Sports Medicine. 2009; 37 (11): 2108-2116.",
        "Crossley K, Bennell K, Green S, Cowan S, McConnell J. Physical therapy for patellofemoral pain: A randomized, double-blinded, placebo-controlled trial. The American Journal of Sports Medicine. 2002; 30(6): 857-865.",
        "Kettunen JA, Harilainen A, Sandelin J, et al. Knee arthroscopy and exercise versus exercise only for chronic patellofemoral pain syndrome: A randomized controlled trial. BMC Medicine. 2007; 5 (1): 38.",
        "Ferber R, Bolgla L, Earl-Boehm JE, Emery C, Hamstra-Wright K. Strengthening of the Hip and Core Versus Knee Muscles for the Treatment of Patellofemoral Pain: A Multicenter Randomized Controlled Trial. Journal of Athletic Training. 2015; 50 (4): 366-377.",
        "Dolak KL, Silkman C, McKeon JM, Hosey RG, Lattermann C, Uhl TL. Hip Strengthening Prior to Functional Exercises Reduces Pain Sooner Than Quadriceps Strengthening in Females With Patellofemoral Pain Syndrome: A Randomized Clinical Trial. Journal of Orthopaedic & Sports Physical Therapy. August 2011; 41 (8): 560-570.",
        "Herrington L, Al-Sherhi A. A controlled trial of weight-bearing versus non-weight-bearing exercises for patellofemoral pain. Journal of Orthopaedic & Sports Physical Therapy. 2007; 37 (4): 155-160.",
        "Powers CM, Ward SR, Fredericson M, Guillet M, Shellock FG. Patellofemoral kinematics during weight-bearing and non-weight-bearing knee extension in persons with lateral subluxation of the patella: A preliminary study. Journal of Orthopaedic & Sports Physical Therapy. 2003; 33 (11): 677-685.",
        "Flynn TW, Soutas-Little RW. Patellofemoral joint compressive forces in forward and backward running. Journal of Orthopaedic & Sports Physical Therapy. 1995; 21 (5): 277-282.",
        "Huang BY, Shih YF, Chen WY, Ma HL. Predictors for Identifying With Patellofemoral Pain Syndrome Responding to Femoral Nerve Mobilization. Archives of Physical Medicine and Rehabilitation. 2015; 96: 920-97.",
        "Liao TC, Yang N, Ho KY, Farrokhi S, Powers CM. Femur Rotation Increases Patella Cartilage Stress in Females with Patellofemoral Pain. Official Journal of the American College of Sports Medicine. 2015; 1775-1780.",
        "Ramskov D, Barton C, Nielsen RO, Rasmussen S. High Eccentric Hip Abduction Strength Reduces the Risk of Developing Patellofemoral Pain Among Novice Runners Initiating a Self-Structured Running Program: A 1-Year Observational Study. Journal of Orthopaedic & Sports Physical Therapy. March 2015; 45 (3): 153 -161.",
        "Yilmaz Yelvar GD, Baltaci G, Bayrakci Tunay V, Atay AO. The effect of postural stabilization exercises on pain and function in females with patellofemoral pain syndrome. Acta Orthrop traumatol Turc. 2015; 49 (2): 166-174.",
        "Glaviano NR, Saliba S. Impairment based rehabilitation for patellofemoral pain patients. The Physician and Sportsmedicine. 2016; 44 (3): 311-323.",
        "Miao P, Xu Y, Pan C, Liu H, Wang C. Vastus medialis oblique and vastus lateralis activity during a double-leg semisquat with or without hip adduction in patients with patellofemoral pain syndrome. BMC Musculoskeletal Disorders. 2015; 16 (289): 1-8.",
        "Giles LS, Webster KE, McClelland JA, Cook J. Atrophy of the Quadriceps Is Not Isolated to the Vastus Medialis Oblique in Individuals With Patellofemoral Pain. Journal of Orthopaedic & Sports Physical Therapy. August 2015; 45 (8): 613-619.",
        "Sendur OF, Gurer G, Yildirim T, Ozturk E, Aydeniz A. Relationship of Q angle and joint hypermobility and Q angle values in different positions. Clinical Rheumatology. 2006; 25 (3): 304-308.",
        "Powers CM. The influence of altered lower-extremity kinematics on patellofemoral joint dysfunction: A theoretical perspective. Journal of Orthopaedic & Sports Physical Therapy. 2003; 33 (11): 639-646.",
        "Salsich GB, Perman WH. Patellofemoral joint contact area is influenced by tibiofemoral rotation alignment in individuals who have patellofemoral pain. Journal of Orthopaedic & Sports Physical Therapy. 2007; 37 (9): 521-528.",
        "Noehren B, Pohl MB, Sanchez Z, Cunningham T, Lattermann C. Proximal and Distal Kinematics in Female Runners with Patellofemoral Pain. Clinical Biomechanics. May 2012; 27 (4): 366-371.",
        "Horschig A, Sonthana K, Neff T. The Squat Bible: The Ultimate Guide to Mastering the Squat and Finding Your True Strength. Middletown, Delaware: Squat University LLC; 2017. 48, 71, 90-92, 99-102, 148-164.",
        "Cook C, Hegedus E. Orthopedic Physical Examination Tests: An Evidence-Based Approach. Prentice Hall, 2008.",
        "Pazzinatto MF, de Oliveira Silva D, Barton C, Rathleff MS, Briani RV, Micolis de Azevedo F. Female Adults with Patellofemoral Pain Are Characterized by Widespread Hyperalgesia Which Is Not Affected Immediately by Patellofemoral Joint Loading. Pain Medicine. 2016; 17: 1953-1961.",
        "Lesher JD, Sutlive TG, Miller GA, Chine NJ, Garber MB, Wainner RS. Development of a clinical prediction rule for classifying patients with patellofemoral pain syndrome who respond to patellar taping. Journal of Orthopaedic & Sports Physical Therapy. 2006; 36 (11): 854-866.",
        "Greenwood JLJ, Joy EA, Stanford JB. The Physical Activity Vital Sign: A Primary Care Tool to Guide Counseling for Obesity. Journal of Physical Activity and Health. 2019; 7: 571-576.",
        "Waryasz GR, McDermott AY. Patellofemoral pain syndrome (PFPS): A systematic review of anatomy and potential risk factors. Dynamic Medicine. 2008; 7: 9.",
        "Wilson T. The measurement of patellar alignment in patellofemoral pain syndrome: Are we confusing assumptions with evidence? Journal of Orthopaedic & Sports Physical Therapy. 2007; 37 (6): 330-341.",
        "Aminaka N, Gribble PA. A systematic review of the effects of therapeutic taping on patellofemoral pain syndrome. Journal of Athletic Training. 2005; 40 (4): 341 -351.",
        "Logan CA, Bhashyam AR, Tisosky AJ, Haber DB, Provencher MT. Systematic Review of the Effect of Taping Techniques on Patellofemoral Pain Syndrome. Sports Health. 2017; 9 (5): 456-461.",
        "Whittingham M, Palmer S, Macmillan F. Effects of taping on pain and function in patellofemoral pain syndrome: A randomized controlled trial. Journal of Orthopaedic & Sports Physical Therapy. 2004; 34 (9): 504-510.",
        "Souza RB, Powers CM. Differences in Hip Kinematics, Muscle Strength, and Muscle Activation Between Subjects With and Without Patellofemoral Pain. Journal of Orthopaedic & Sports Physical Therapy. January 2009; 39 (1): 12-19.",
        "Bolgla LA, Earl-Boehm J, Emery C, Hamstra-Wright K, Ferber R. Comparison of hip and knee strength in males with and without patellofemoral pain. Physical Therapy in Sport. 2015; 16: 215-221.",
        "Santos TRT, Oliveira BA, Ocarino JM, Holt KG, Fonseca ST. Effectiveness of hip muscle strengthening in patellofemoral pain syndrome patients: a systematic review. Brazilian Journal of Physical Therapy. May-June 2015; 19 (3): 167-176.",
        "Chinkulprasert C, Vachalathihi R, Powers CM. Patellofemoral Joint Forces and Stress During Forward Step-Up, Lateral Step-up, and Forward Step-down Exercises. Journal of Orthopaedic & Sports Physical Therapy. April 2011; 41 (4): 241-248.",
        "Gross MT, Foxworth JL. The role of foot orthoses as an intervention for patellofemoral pain. Journal of Orthopaedic & Sports Physical Therapy. 2003; 33 (11): 661 -670.",
        "Vicenzino B, Franettovich M, McPoil T, Russell T, Skardoon G. Initial Effects of Anti-Pronation Tape on the Medial Longitudinal Arch During Walking and Running. British Journal of Sports Medicine. 2005; 39 (12): 939-943.",
        "Behrangrad S, Kamali F. Comparison of ischemic compression and lumbopelvic manipulation as trigger point therapy for patellofemoral pain syndrome in young adults: A double blind randomized clinical trial. Journal of Bodywork & Movement Therapies. 2017; (21): 554-564.",
        "Espí-López GV, Serra-Añó P, Vicent-Ferrando J et al. Effectiveness of Inclusion of Dry Needling in a Multimodal Therapy Program for Patellofemoral Pain: A Randomized Parallel-Group Trial. Journal of Orthopaedic & Sports Physical Therapy. June 2017; 47 (6): 392-401.",
        "Timm KE. Randomized controlled trial of protonics on patellar pain, position, and function. Medicine & Science in Sports & Exercise. 1998; 30 (5): 665-670."
      ]
    }
  }
}
``` 

### Summary of JSON Structure
1. **Top-Level Key**: Title of the document.
2. **Sections**: Each section is represented as a key within the JSON, and its content is stored either directly or as a list.
3. **Hierarchical Structure**: Headings and subheadings are nested appropriately, ensuring a clear schema.
4. **Complete Content**: Content is integrated from both input sources, ensuring no fragments or incomplete sentences remain in the output. 
5. **References**: Directly included as an array of strings to maintain clarity and access.
"""

def clean_json_response(response_content):
    """
    Cleans the OpenAI JSON response to ensure that only valid JSON is returned by removing extra non-JSON text after the '```json' block.
    """
    # First, strip away any initial markdown code block markers
    cleaned_json_content = re.sub(r"```json", "", response_content.strip())

    # Now remove everything after the last ``` or any trailing non-JSON content
    cleaned_json_content = re.sub(r"```.*$", "", cleaned_json_content, flags=re.DOTALL)

    # Return the cleaned JSON content
    return cleaned_json_content.strip()

print(clean_json_response(mock_response))

Here's a structured JSON output based on the provided content from both markdown sources:

{
  "Rehabilitation Protocol for Patellofemoral Pain Syndrome": {
    "Introduction": {
      "content": "This guideline is intended to provide the clinician with a guideline of the non-operative course of care for Patellofemoral Pain Syndrome. Specific intervention should be based on the needs of the individual and should consider exam findings and clinical decision making. The timeframes for expected outcomes contained within this guideline may vary. If a clinician requires assistance in the progression of a patient, they should consult with the referring provider. The interventions included within this protocol are not intended to be an inclusive list. Therapeutic interventions should be included and modified based on the progress of the patient and under the discretion of the clinician. Patellofemoral Pain Syndrome (PFPS) is a general category of anterior knee pain that is characterized as pa