In [6]:
import pandas as pd
# Load the Excel file
file_path = 'Auto_Regressive_Model_Diffusion_Model_V_1.0.xlsx'
xls = pd.ExcelFile(file_path)

# Step 1: Create Symptom Dictionary
# Load the Base Data Set to create a symptom dictionary
base_data_set_full = pd.read_excel(xls, sheet_name='Base_Data_Set')

# Assuming symptom severity is classified across columns, we'll create a dictionary
symptom_dict = {
    'Symptom 1': ['Fever: mild, low, high'],
    'Symptom 2': ['Cough: mild, low, high'],
    'Symptom 3': ['Cold: mild, low, high']
}

# Display the symptom dictionary
print("Symptom Dictionary:")
print(symptom_dict)

Symptom Dictionary:
{'Symptom 1': ['Fever: mild, low, high'], 'Symptom 2': ['Cough: mild, low, high'], 'Symptom 3': ['Cold: mild, low, high']}


In [7]:
# Initial dataset of symptoms in a dictionary (for reference)
symptom_dict = {
    "S1": {"Fever": {"mild": None, "low": None, "high": None}},
    "S2": {"Cough": {"mild": None, "low": None, "high": None}},
    "S3": {"Cold": {"mild": None, "low": None, "high": None}}
}

# Additional symptoms dictionary
additional_symptoms = set(["Body Ache", "Shivering", "Head Ache", "Nausea", "Sickness", "Vertigo"])

# Initial loss dictionary to track missing symptoms
loss = {
    "Fever": False,
    "Cough": False,
    "Cold": False,
    "Other Symptoms": set()
}

# Dataset in memory (sample)
patients = [
    {"Patient_Id": "000000001", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Body Ache"},
    {"Patient_Id": "000000002", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Shivering"},
    {"Patient_Id": "000000003", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Head Ache"},
    {"Patient_Id": "000000004", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Nausia"},
    {"Patient_Id": "000000005", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Sickness"},
    {"Patient_Id": "000000006", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Vertigo"},
    {"Patient_Id": "000000007", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Head Ache"},
    {"Patient_Id": "000000008", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Nausia"},
    {"Patient_Id": "000000009", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Sickness"},
    {"Patient_Id": "000000010", "Fever": {"mild": "Y", "low": "N", "high": "N"}, "Other Symptoms": "Vertigo, Nausia"}
]

# Function to read the dataset and update the dictionary with found symptoms
def update_symptom_dictionary(patients, symptom_dict, additional_symptoms, loss):
    for patient in patients:
        # Check for Fever in different severities
        fever_data = patient["Fever"]
        for severity, status in fever_data.items():
            if status == "Y":
                symptom_dict["S1"]["Fever"][severity] = True
            elif status == "N":
                symptom_dict["S1"]["Fever"][severity] = False
            else:
                loss["Fever"] = True  # Track missing or unclear data

        # Check for other symptoms
        other_symptoms = patient["Other Symptoms"].split(", ")
        for sym in other_symptoms:
            if sym in additional_symptoms:
                if sym not in symptom_dict:
                    symptom_dict[sym] = {"Other Symptom": True}
            else:
                loss["Other Symptoms"].add(sym)

# Function to print out the losses (missing or new symptom columns)
def print_loss(loss):
    print("Loss (Symptoms not found in dictionary):")
    for key, value in loss.items():
        if isinstance(value, bool):
            if value:
                print(f" - {key}: Missing severity data")
        elif isinstance(value, set) and value:
            print(f" - {key}: {', '.join(value)} (New Symptoms)")

# Update the dictionary with the patient data
update_symptom_dictionary(patients, symptom_dict, additional_symptoms, loss)

# Print out the final symptom dictionary
print("Symptom Dictionary:")
for key, value in symptom_dict.items():
    print(f"{key}: {value}")

# Print out the missing or new symptoms found in the data
print_loss(loss)

Symptom Dictionary:
S1: {'Fever': {'mild': True, 'low': False, 'high': False}}
S2: {'Cough': {'mild': None, 'low': None, 'high': None}}
S3: {'Cold': {'mild': None, 'low': None, 'high': None}}
Body Ache: {'Other Symptom': True}
Shivering: {'Other Symptom': True}
Head Ache: {'Other Symptom': True}
Sickness: {'Other Symptom': True}
Vertigo: {'Other Symptom': True}
Loss (Symptoms not found in dictionary):
 - Other Symptoms: Nausia (New Symptoms)


In [8]:
# Initial dictionary of symptoms (for reference)
symptom_dict = {
    "S1": {"Fever": {"mild": None, "low": None, "high": None}},
    "S2": {"Cough": {"mild": None, "low": None, "high": None}},
    "S3": {"Cold": {"mild": None, "low": None, "high": None}},
    "S4": {"Body Ache": None},
    "S5": {"Shivering": {"mild": None, "high": None, "intermittent": None}}
}

# Loss dictionary to track missing symptoms or attributes
loss = {
    "Symptoms": set(),
    "Attributes": set()
}

# Patient observations dataset
patients = [
    {"Patient_Id": "000000001", "Observation": "Body Ache, Head Ache", "Particulars": "Low Body Ache, High Head Ache", "Time Period": "In Morning", "Location": {"City": None, "State": None, "Country": None, "Pincode": None}},
    {"Patient_Id": "000000002", "Observation": "Body Ache, Shivering", "Particulars": "High Shivering", "Time Period": "In Afternoon", "Location": {"City": None, "State": None, "Country": None, "Pincode": None}},
    {"Patient_Id": "000000003", "Observation": "Shivering, Nausia", "Particulars": "Low Nausia", "Time Period": "Throughout", "Location": {"City": None, "State": None, "Country": None, "Pincode": None}},
    {"Patient_Id": "000000004", "Observation": "No observation", "Particulars": "No observation", "Time Period": "No observation", "Location": {"City": None, "State": None, "Country": None, "Pincode": None}},
    {"Patient_Id": "000000005", "Observation": "Intermittent mild fever", "Particulars": None, "Time Period": None, "Location": {"City": None, "State": None, "Country": None, "Pincode": None}},
    {"Patient_Id": "000000006", "Observation": "Continuous high fever", "Particulars": None, "Time Period": None, "Location": {"City": None, "State": None, "Country": None, "Pincode": None}},
    {"Patient_Id": "000000007", "Observation": "Intermittent Shivering", "Particulars": None, "Time Period": None, "Location": {"City": None, "State": None, "Country": None, "Pincode": None}},
]

# Function to update the dictionary with new data
def enhance_symptom_dictionary(patients, symptom_dict, loss):
    for patient in patients:
        observations = patient["Observation"].split(", ")
        particulars = patient["Particulars"]
        time_period = patient["Time Period"]
        location = patient["Location"]

        # Update the dictionary with new observations
        for obs in observations:
            if obs not in symptom_dict:
                # Add new observation if not in the dictionary
                symptom_dict[obs] = {"Attributes": None}
                loss["Symptoms"].add(obs)  # Track it as a new symptom

            # Attach attributes
            symptom_dict[obs] = {
                "Particulars": particulars if particulars else "N/A",
                "Time Period": time_period if time_period else "N/A",
                "Location": {
                    "City": location["City"] if location["City"] else "N/A",
                    "State": location["State"] if location["State"] else "N/A",
                    "Country": location["Country"] if location["Country"] else "N/A",
                    "Pincode": location["Pincode"] if location["Pincode"] else "N/A"
                }
            }

        # Track any missing attributes
        if not particulars:
            loss["Attributes"].add(f"Particulars for {observations}")
        if not time_period:
            loss["Attributes"].add(f"Time Period for {observations}")
        if not all(location.values()):
            loss["Attributes"].add(f"Location for {observations}")

# Function to print out the losses
def print_loss(loss):
    print("Loss (Symptoms or Attributes not found):")
    if loss["Symptoms"]:
        print(f" - Missing Symptoms: {', '.join(loss['Symptoms'])}")
    if loss["Attributes"]:
        print(f" - Missing Attributes: {', '.join(loss['Attributes'])}")
    if not loss["Symptoms"] and not loss["Attributes"]:
        print(" - No losses detected!")

# Function to print the enhanced dictionary
def print_enhanced_dictionary(symptom_dict):
    print("Enhanced Symptom Dictionary:")
    for key, value in symptom_dict.items():
        print(f"{key}: {value}")

# Enhance the dictionary with the new data from patients
enhance_symptom_dictionary(patients, symptom_dict, loss)

# Print the enhanced dictionary and the loss
print_enhanced_dictionary(symptom_dict)
print_loss(loss)

Enhanced Symptom Dictionary:
S1: {'Fever': {'mild': None, 'low': None, 'high': None}}
S2: {'Cough': {'mild': None, 'low': None, 'high': None}}
S3: {'Cold': {'mild': None, 'low': None, 'high': None}}
S4: {'Body Ache': None}
S5: {'Shivering': {'mild': None, 'high': None, 'intermittent': None}}
Body Ache: {'Particulars': 'High Shivering', 'Time Period': 'In Afternoon', 'Location': {'City': 'N/A', 'State': 'N/A', 'Country': 'N/A', 'Pincode': 'N/A'}}
Head Ache: {'Particulars': 'Low Body Ache, High Head Ache', 'Time Period': 'In Morning', 'Location': {'City': 'N/A', 'State': 'N/A', 'Country': 'N/A', 'Pincode': 'N/A'}}
Shivering: {'Particulars': 'Low Nausia', 'Time Period': 'Throughout', 'Location': {'City': 'N/A', 'State': 'N/A', 'Country': 'N/A', 'Pincode': 'N/A'}}
Nausia: {'Particulars': 'Low Nausia', 'Time Period': 'Throughout', 'Location': {'City': 'N/A', 'State': 'N/A', 'Country': 'N/A', 'Pincode': 'N/A'}}
No observation: {'Particulars': 'No observation', 'Time Period': 'No observation'

In [9]:
import csv
import json
import xml.etree.ElementTree as ET
import os

# Global dictionary to store the data and keep track of dictionary loss
symptom_dict = {}
loss = {"Symptoms": set(), "Attributes": set()}

# Function to read CSV, TSV, JSON, and XML files
def read_file(file_path, file_type, separator=None):
    if file_type == 'csv' or file_type == 'tsv':
        with open(file_path, 'r') as file:
            delimiter = ',' if file_type == 'csv' else '\t' if file_type == 'tsv' else separator
            reader = csv.DictReader(file, delimiter=delimiter)
            data = list(reader)
    elif file_type == 'json':
        with open(file_path, 'r') as file:
            data = json.load(file)
    elif file_type == 'xml':
        tree = ET.parse(file_path)
        root = tree.getroot()
        data = []
        for elem in root:
            data_item = {child.tag: child.text for child in elem}
            data.append(data_item)
    else:
        raise ValueError("Unsupported file type.")
    return data

# Function to enhance the dictionary with new data and attributes
def enhance_dictionary(data):
    global symptom_dict
    for item in data:
        for key, value in item.items():
            if key not in symptom_dict:
                symptom_dict[key] = {"Attributes": None}
                loss["Symptoms"].add(key)
            symptom_dict[key]["Attributes"] = value

# Function to dump the dictionary into a file
def dump_dictionary(file_name):
    with open(file_name, 'w') as file:
        json.dump(symptom_dict, file, indent=4)
    print(f"Dictionary successfully saved to {file_name}")

# Function to allow manual editing of the dictionary
def manual_edit_dictionary():
    global symptom_dict
    while True:
        print("Dictionary for editing (key-value pairs):")
        for key, value in symptom_dict.items():
            print(f"{key}: {value}")
        key_to_edit = input("\nEnter the key to edit or 'exit' to stop: ").strip()
        if key_to_edit.lower() == 'exit':
            break
        if key_to_edit in symptom_dict:
            new_value = input(f"Enter new value for {key_to_edit}: ").strip()
            symptom_dict[key_to_edit]["Attributes"] = new_value
        else:
            print(f"Key '{key_to_edit}' not found in dictionary.")

# Function to print data using columns in the dictionary
def print_data_using_dictionary(data):
    print("Printing data using available dictionary columns...")
    for item in data:
        output = {key: value for key, value in item.items() if key in symptom_dict}
        print(output)

# Function to print the current dictionary loss (missing data)
def print_loss():
    print("Dictionary Loss Report:")
    if loss["Symptoms"]:
        print(f"Missing Symptoms: {', '.join(loss['Symptoms'])}")
    else:
        print("No missing symptoms found!")

# Function to reparse data and enhance the dictionary
def reparse_data(file_path, file_type, separator=None):
    print(f"Reparsing data from {file_path}...")
    data = read_file(file_path, file_type, separator)
    enhance_dictionary(data)
    print("Dictionary enhanced with new data.")
    print_loss()

# Main function to run the parser program
def main():
    print("Parser Program: Reads and processes data, enhances dictionary, and allows manual editing.")
    while True:
        print("\nMenu:")
        print("1. Read and Create Dictionary")
        print("2. Dump Dictionary to File")
        print("3. Print Data Sets Using Dictionary")
        print("4. Allow Manual Editing and Reparse Dictionary")
        print("5. Exit")
        choice = input("Enter your choice: ").strip()

        if choice == '1':
            file_path = input("Enter file path: ").strip()
            file_type = input("Enter file type (csv, tsv, json, xml): ").strip().lower()
            separator = input("Enter separator (if applicable): ").strip() if file_type in ['csv', 'tsv'] else None
            data = read_file(file_path, file_type, separator)
            enhance_dictionary(data)
            print_loss()
        elif choice == '2':
            file_name = input("Enter file name to dump the dictionary (e.g., dictionary.json): ").strip()
            dump_dictionary(file_name)
        elif choice == '3':
            file_path = input("Enter file path to read data: ").strip()
            file_type = input("Enter file type (csv, tsv, json, xml): ").strip().lower()
            separator = input("Enter separator (if applicable): ").strip() if file_type in ['csv', 'tsv'] else None
            data = read_file(file_path, file_type, separator)
            print_data_using_dictionary(data)
        elif choice == '4':
            manual_edit_dictionary()
            reparse = input("Would you like to reparse a file to enhance the dictionary? (y/n): ").strip().lower()
            if reparse == 'y':
                file_path = input("Enter file path to reparse: ").strip()
                file_type = input("Enter file type (csv, tsv, json, xml): ").strip().lower()
                separator = input("Enter separator (if applicable): ").strip() if file_type in ['csv', 'tsv'] else None
                reparse_data(file_path, file_type, separator)
        elif choice == '5':
            print("Exiting program.")
            break
        else:
            print("Invalid choice. Please try again.")

# Run the parser program
if __name__ == '__main__':
    main()

Parser Program: Reads and processes data, enhances dictionary, and allows manual editing.

Menu:
1. Read and Create Dictionary
2. Dump Dictionary to File
3. Print Data Sets Using Dictionary
4. Allow Manual Editing and Reparse Dictionary
5. Exit
Enter your choice: 2
Enter file name to dump the dictionary (e.g., dictionary.json): sympton_dictionary.json
Dictionary successfully saved to sympton_dictionary.json

Menu:
1. Read and Create Dictionary
2. Dump Dictionary to File
3. Print Data Sets Using Dictionary
4. Allow Manual Editing and Reparse Dictionary
5. Exit
Enter your choice: 5
Exiting program.
