In [70]:
import numpy as np
import pandas as pd

import re

In [71]:

# Use the 'resolve/main' URL which points directly to the raw file content
url = "https://huggingface.co/datasets/Carxofa85/dermatology/resolve/main/output.json"

df = pd.read_json(url)
print(df.head())

                                         instruction input  \
0  What is psoriasis and what are its common symp...         
1                      What is the etiology of acne?         
2  What are the recommended medications for atopi...         
3  Can you tell me about the treatment modalities...         
4  What is rosacea and what are its common symptoms?         

                                              output  
0  Psoriasis is a chronic autoimmune condition th...  
1  Acne is primarily caused by the overproduction...  
2  There are several medications available for th...  
3  Melanoma treatment depends on the stage and lo...  
4  Rosacea is a common skin condition that causes...  


In [72]:
df.head(30)

Unnamed: 0,instruction,input,output
0,What is psoriasis and what are its common symp...,,Psoriasis is a chronic autoimmune condition th...
1,What is the etiology of acne?,,Acne is primarily caused by the overproduction...
2,What are the recommended medications for atopi...,,There are several medications available for th...
3,Can you tell me about the treatment modalities...,,Melanoma treatment depends on the stage and lo...
4,What is rosacea and what are its common symptoms?,,Rosacea is a common skin condition that causes...
5,What causes seborrheic dermatitis?,,The exact cause of seborrheic dermatitis is un...
6,What are the recommended medications for treat...,,Treatment for vitiligo aims to stop or slow th...
7,What are the treatment options for basal cell ...,,Treatment options for basal cell carcinoma dep...
8,What is eczema and what are its common symptoms?,,"Eczema, also known as atopic dermatitis, is a ..."
9,What causes contact dermatitis?,,Contact dermatitis is caused by a direct react...


In [73]:
def categorize_instruction(text):
    text = text.lower() # Convert to lowercase for easier matching
    if 'symptom' in text or 'present' in text or 'sign' in text:
        return 'Symptom'
    elif 'etiology' in text or 'cause' in text:
        return 'Cause'
    elif 'treatment' in text or 'medication' in text or 'treat' in text or 'manage' in text or 'therap' in text:
        return 'Treatment'
    else:
        return 'General' # Fallback for definitions or other questions

# 3. Apply the function to create the new column
df['category'] = df['instruction'].apply(categorize_instruction)

# 4. View the result
print(df[['instruction', 'category']].head())

                                         instruction   category
0  What is psoriasis and what are its common symp...    Symptom
1                      What is the etiology of acne?      Cause
2  What are the recommended medications for atopi...  Treatment
3  Can you tell me about the treatment modalities...  Treatment
4  What is rosacea and what are its common symptoms?    Symptom


In [74]:
df[df['category'] == 'General']

Unnamed: 0,instruction,input,output,category
106,I often go tanning and I've noticed some irreg...,,It's essential to get any new or changing mole...,General
112,"I've noticed that my skin appears yellowish, i...",,While these symptoms can indicate a number of ...,General
113,"I have itchy, red, inflamed, and scaly skin. C...",,"The symptoms you described — itchy, red, infla...",General
225,What are the potential triggers of eczema?,,Eczema flare-ups can be triggered by various f...,General
233,What is the role of the immune system in causi...,,The immune system plays a significant role in ...,General
...,...,...,...,...
1465,What are the preventative measures for skin ca...,A 45-year-old patient with a family history of...,Preventative measures for skin cancer include ...,General
1470,List the main types of skin infections and the...,,The main types of skin infections are bacteria...,General
1471,Explain the indications and side effects of or...,,Oral corticosteroids are indicated for their a...,General
1473,What factors enhance and retard wound healing?,,Factors affecting wound healing can be both ge...,General


In [75]:
df[df['instruction'].str.contains('acne', case=False) == True]

Unnamed: 0,instruction,input,output,category
1,What is the etiology of acne?,,Acne is primarily caused by the overproduction...,Cause
13,What are the treatment modalities for acne?,,Acne treatment depends on the severity of the ...,Treatment
40,What is the recommended medication for acne?,,The recommended medication for acne depends on...,Treatment
46,What are the common symptoms of acne?,,Acne symptoms vary depending on the severity o...,Symptom
51,What are the common causes of acne?,,Acne is primarily caused by:\n\n1. Overproduct...,Cause
56,What are the treatment options for acne?,,Treatment for acne varies depending on the sev...,Treatment
60,What are the common medications for treating a...,,Acne treatments depend on the severity of the ...,Treatment
73,What are some common treatments for acne?,,Treatment for acne depends on its severity and...,Treatment
79,What are the recommended medications for acne ...,,The type of medication your doctor prescribes ...,Treatment
84,What is the main cause of acne?,,Acne is primarily caused by the overproduction...,Cause


In [76]:

def extract_condition(text):
    text = text.lower()
    # Remove common question starters to clean up the string
    text = re.sub(r'^(what is|what are|describe|can you tell me about|what causes|treatment for|treating)\s+', '', text)
    
    # Pattern: Look for words following "of", "for", "about"
    match = re.search(r'(?:of|for|about)\s+(?:the\s+)?([a-z\s]+?)(?:\?| and| symptoms| treatment|$)', text)
    if match:
        return match.group(1).strip()
    
    # Pattern: Look for the first word(s) if it's a direct definition
    match = re.search(r'^([a-z\s]+?)(?:\?| and|$)', text)
    if match:
        return match.group(1).strip()
        
    return "Unknown"

# 3. Apply extraction
df['condition'] = df['instruction'].apply(extract_condition)

# 4. Group by the new Condition column
grouped = df.groupby('condition').size()

# --- NEW STEP: Generate the List of Keywords ---
# Get unique values, sort them, and filter out "Unknown"
unique_keywords = sorted([x for x in df['condition'].unique() if x != "Unknown"])

print("--- Extracted Conditions (Sample) ---")
print(df[['instruction', 'condition']].head())

print("\n--- List of Keywords to Review ---")
for i, keyword in enumerate(unique_keywords):
    print(f"{i+1}. {keyword}")

--- Extracted Conditions (Sample) ---
                                         instruction          condition
0  What is psoriasis and what are its common symp...          psoriasis
1                      What is the etiology of acne?               acne
2  What are the recommended medications for atopi...  atopic dermatitis
3  Can you tell me about the treatment modalities...           melanoma
4  What is rosacea and what are its common symptoms?            rosacea

--- List of Keywords to Review ---
1. a better complexion
2. a burn injury
3. a chemical burn
4. a dermatologist in the
5. a diabetic foot ulcer
6. a first degree burn
7. a fungal nail infection
8. a fungal skin infection
9. a healthy
10. a mild case of impetigo
11. a minor burn
12. a minor burn injury
13. a minor skin wound at home
14. a minor sunburn
15. a pressure ulcer
16. a second degree burn
17. a severe sunburn
18. a skin wound infection
19. a staphylococcal skin infection
20. a sunburn at home
21. a surgical wound
2

In [77]:
target_conditions = ['impetigo',
 'burn',

 'sunburn',

 'staphylococcal',
 'acne',
 'acne vulgaris',
 'actinic prurigo',
 'cellulitis',
 'aged skin',
 'dermatitis',
 'alopecia areata',

 'prurigo nodularis',
 'atopic dermatitis',

 'bullous impetigo',
 'bullous pemphigoid',

 'actinic prurigo',


 'erythema',
 
 'lichen sclerosus',
 
 'lipodermatosclerosis',
 'livedoid vasculopathy',

 'cheilitis',

 'eczema',

 'erythema ab igne',
 'erythema multiforme',
 'erythema nodosum',
 

 'xeroderma pigmentosum',

 'actinic prurigo',
 'alopecia areata',
 'dermatomyositis',
 'pityriasis rosea',
 'pretibial myxedema',
 'prurigo nodularis',
 'xeroderma pigmentosum',
 'irritant contact dermatitis',
 
 'necrobiotic xanthogranuloma',


 'jock itch',
 'lichen planus',
 'lichen sclerosus',

 'lipodermatosclerosis',
 'livedoid vasculopathy',

 'melanoma',
 'melasma',

 'molluscum contagiosum',

 'necrobiotic xanthogranuloma',
 'necrotizing fasciitis',
 
 'pemphigus vulgaris',

 'phaeohyphomycosis',

 'pityriasis rosea',

 'pretibial myxedema',
 
 'prurigo nodularis',

 'psoriasis',
 'pyoderma gangrenosum',

 'ringworm',
 'rosacea',
 'scleroderma',
 'seborrheic dermatitis',
 'shingles',
 'skin allergies',
 'skin burns',
 'skin cancer',
 'skin dryness',

 'skin infections',
 'skin inflammation',
 'skin pigmentation disorders',
 'skin tags',
 'skin wounds',

 'polymyositis',

 'pretibial myxedema',

 'prurigo nodularis',
 'tinea capitis',
 'tinea corporis',

 'acne',

 'eczema',
 'hives',

 'rosacea',


 'necrotizing fasciitis',
 'pityriasis rosea',

 'tinea corporis',


 'vitiligo',
 'warts',

 'scleroderma',
 'psoriasis',


 'wrinkles',
 'xeroderma pigmentosum',
'basal cell carcinoma',
'fungal skin infection',
                    'herpes zoster',
                    'hidradenitis suppurativa',
                    'tinea versicolor']

In [78]:
def find_condition_from_list(text, condition_list):
    text = text.lower()
    for condition in condition_list:
        # \b ensures we match "acne" but not "acneform" (exact word match)
        pattern = r'\b' + re.escape(condition) + r'\b'
        if re.search(pattern, text):
            return condition
    return "Other" # Label for rows that don't match your list

# 4. Apply the function
df['condition'] = df['instruction'].apply(
    lambda x: find_condition_from_list(x, target_conditions)
)

# 5. Check the results
print(df[['instruction', 'condition']].head(10))

                                         instruction             condition
0  What is psoriasis and what are its common symp...             psoriasis
1                      What is the etiology of acne?                  acne
2  What are the recommended medications for atopi...            dermatitis
3  Can you tell me about the treatment modalities...              melanoma
4  What is rosacea and what are its common symptoms?               rosacea
5                 What causes seborrheic dermatitis?            dermatitis
6  What are the recommended medications for treat...              vitiligo
7  What are the treatment options for basal cell ...  basal cell carcinoma
8   What is eczema and what are its common symptoms?                eczema
9                    What causes contact dermatitis?            dermatitis


In [79]:
df[df['condition'] == 'Other']

Unnamed: 0,instruction,input,output,category,condition
106,I often go tanning and I've noticed some irreg...,,It's essential to get any new or changing mole...,General,Other
111,"I have red, flaky, crusty patches of skin, cov...",,Your symptoms suggest you may be having psoria...,Cause,Other
112,"I've noticed that my skin appears yellowish, i...",,While these symptoms can indicate a number of ...,General,Other
113,"I have itchy, red, inflamed, and scaly skin. C...",,"The symptoms you described — itchy, red, infla...",General,Other
114,What are the possible causes of birthmarks and...,,Birthmarks are skin markings that are present ...,Cause,Other
...,...,...,...,...,...
1491,What are some dermatologic emergencies and the...,,Dermatologic emergencies include conditions su...,Treatment,Other
1492,How is chronic venous insufficiency managed?,,Management of chronic venous insufficiency inc...,Treatment,Other
1500,What are common superficial fungal infections ...,,Superficial fungal infections are common infec...,Treatment,Other
1504,Explain the cause and treatment of scabies.,,Scabies is a contagious skin infestation cause...,Cause,Other


In [80]:
df = df.drop('input', axis=1)


In [81]:
df[df['condition']== 'acne']

Unnamed: 0,instruction,output,category,condition
1,What is the etiology of acne?,Acne is primarily caused by the overproduction...,Cause,acne
13,What are the treatment modalities for acne?,Acne treatment depends on the severity of the ...,Treatment,acne
40,What is the recommended medication for acne?,The recommended medication for acne depends on...,Treatment,acne
46,What are the common symptoms of acne?,Acne symptoms vary depending on the severity o...,Symptom,acne
51,What are the common causes of acne?,Acne is primarily caused by:\n\n1. Overproduct...,Cause,acne
56,What are the treatment options for acne?,Treatment for acne varies depending on the sev...,Treatment,acne
60,What are the common medications for treating a...,Acne treatments depend on the severity of the ...,Treatment,acne
73,What are some common treatments for acne?,Treatment for acne depends on its severity and...,Treatment,acne
79,What are the recommended medications for acne ...,The type of medication your doctor prescribes ...,Treatment,acne
84,What is the main cause of acne?,Acne is primarily caused by the overproduction...,Cause,acne


In [82]:
# grouped.to_csv("grouped.csv")

In [83]:
df.to_csv('medical_info.csv')