In [None]:
# Work 22: Case-Insensitive Search for BMI, Painoindeksi, or PI in EHR Data:
#  [W22.BMI.1.All_Records_BMI.ipynb]

# "This Jupyter notebook script loads EHR data, searches for records with specific terms (BMI, painoindeksi, PI),
#  and saves the filtered results."

########################################################################################################
#  Sequence list
########################################################################################################

# 1: Load the EHR data from the provided file.
# 2: Search for records containing the specified terms.
# 3: Output the results.

########################################################################################################
########################################################################################################
import pandas as pd
import gzip

# 1: Read and load data using chunks to handle erroneous rows
output_path = "/home/work/BMI_records.csv"
data_path = "/home/work/ehrtextdata.csv.gz"
chunks = []
chunk_size = 100000  # Adjust chunk size based on your memory capacity

with gzip.open(data_path, "rt", encoding="utf-8") as f:
    chunk_iter = pd.read_csv(f, chunksize=chunk_size, on_bad_lines="skip")
    for chunk in chunk_iter:
        chunks.append(chunk)

ehr_data = pd.concat(chunks, ignore_index=True)

print("1: Data loaded successfully.")
print("First few rows of the data:")
print(ehr_data.head())

# 2: Search for records containing 'BMI', 'painoindeksi', 'PI', or 'Paino-pituussuhde' (case insensitive)
terms = ["BMI", "painoindeksi", "PI", "Paino-pituussuhde"]


def contains_terms(row):
    text = " ".join(row.astype(str))
    for term in terms:
        if term.lower() in text.lower():
            return True
    return False


filtered_data = ehr_data[ehr_data.apply(contains_terms, axis=1)]

print(f"2: Found {len(filtered_data)} records containing the specified terms.")

# 3: Save the results
filtered_data.to_csv(output_path, index=False)

print(f"3: Results saved to file {output_path}")

print("Task completed successfully.")

########################################################################################################
########################################################################################################