In [208]:
from pyspark.sql import *
from pyspark.sql.functions import *
from pyspark.sql.types import *
# import pandas as pd
import re
import json

In [209]:
spark = SparkSession.builder.getOrCreate()


In [210]:
my_input_data = spark.read.option('header', True).csv("./disease_symptoms.csv")

for column in my_input_data.columns:
    new_col = re.sub(r'[^A-Za-z0-9 ]+', '', str(column))
    my_input_data = my_input_data.withColumnRenamed(column, new_col)

### My Custom Value Processor

In [211]:
def value_processor(input_string : str):
    new_val = re.sub(r'[^A-Za-z0-9 "{}:]+', '', str(input_string))
    return new_val

def de_jsonify(input_string : str):
    out = ""
    try:
        out = json.loads(input_string)["symptoms"]
    except:
        out = re.sub(r'[^A-Za-z0-9 "{}:]+', '', str(input_string)).replace("symptoms", "")


    return out


custom_value_processor = udf(lambda x:value_processor(x),StringType())
custom_de_jsonifier = udf(lambda x:de_jsonify(x),StringType())

In [212]:
for column in my_input_data.columns:
    my_input_data = my_input_data.withColumn(column, custom_value_processor(col(column)))
my_input_data = my_input_data.select(
    custom_de_jsonifier(col("symptoms")).alias("disease_symptoms"), 
    custom_de_jsonifier(col("commonTestsAndProceduresDesc")).alias("common_tests_and_procedures"), 
    custom_de_jsonifier(col("commonMedicationsDesc")).alias("common_medication"), 
    custom_de_jsonifier(col("whoIsAtRiskDesc")).alias("who_is_at_risk"),
    col("name").alias("disease_name")
)
my_input_data.show(truncate=False)
# for row in my_input_data.collect():
#     print(row["whoIsAtRiskDesc"])

+----------------------------------+---------------------------+--------------------------------+--------------------------------+--------------------------------+
|disease_symptoms                  |common_tests_and_procedures|common_medication               |who_is_at_risk                  |disease_name                    |
+----------------------------------+---------------------------+--------------------------------+--------------------------------+--------------------------------+
|Anxiety and nervousness           |Depression                 |Shortness of breath             |Depressive or psychotic symptoms|Panic disorder                  |
|Hoarse voice                      |Sore throat                |Difficulty speaking             |Cough                           |Vocal cord polyp                |
|Groin mass                        |Leg pain                   |Hip pain                        |Suprapubic pain                 |Turner syndrome                 |
|Symptoms of the