# Filter Patients
This notebook loads patient data from a CSV, tokenizes symptoms, and filters patients older than 40 with more than 2 symptoms.

In [None]:
import pandas as pd

## Step 1: Membuat file CSV

In [None]:
data = """id,name,age,symptoms
1,Andi,45,"demam, batuk, sesak napas"
2,Budi,29,"mual, sakit perut"
3,Citra,62,"pusing, kehilangan keseimbangan"
4,Dita,30,"susah tidur"
5,Eka,18,"gusi berdarah"
6,Fitra,49,"pusing, sakit perut"
7,Gio,49,"menggigil, batuk, sakit kepala"
8,Harianto,33,"memar di tangan"
9,Idul,88,"susah tidur"
10,Jaka,54,"sesak nafas"
"""

with open("patients.csv", "w", encoding="utf-8") as f:
    f.write(data)

## Step 2: Load the CSV

In [None]:
df = pd.read_csv("patients.csv")
df

Unnamed: 0,id,name,age,symptoms
0,1,Andi,45,"demam, batuk, sesak napas"
1,2,Budi,29,"mual, sakit perut"
2,3,Citra,62,"pusing, kehilangan keseimbangan"
3,4,Dita,30,susah tidur
4,5,Eka,18,gusi berdarah
5,6,Fitra,49,"pusing, sakit perut"
6,7,Gio,49,"menggigil, batuk, sakit kepala"
7,8,Harianto,33,memar di tangan
8,9,Idul,88,susah tidur
9,10,Jaka,54,sesak nafas


## Step 3: Tokenize the symptoms into lists

In [None]:
df['symptoms_list'] = df['symptoms'].str.split(',')
df[['id', 'name', 'age', 'symptoms_list']]

Unnamed: 0,id,name,age,symptoms_list
0,1,Andi,45,"[demam, batuk, sesak napas]"
1,2,Budi,29,"[mual, sakit perut]"
2,3,Citra,62,"[pusing, kehilangan keseimbangan]"
3,4,Dita,30,[susah tidur]
4,5,Eka,18,[gusi berdarah]
5,6,Fitra,49,"[pusing, sakit perut]"
6,7,Gio,49,"[menggigil, batuk, sakit kepala]"
7,8,Harianto,33,[memar di tangan]
8,9,Idul,88,[susah tidur]
9,10,Jaka,54,[sesak nafas]


## Step 4: Filter patients older than 40 with more than 2 symptoms

In [None]:
filtered_df = df[(df['age'] > 40) & (df['symptoms_list'].apply(len) > 2)]
filtered_df[['id', 'name', 'age', 'symptoms_list']]

Unnamed: 0,id,name,age,symptoms_list
0,1,Andi,45,"[demam, batuk, sesak napas]"
6,7,Gio,49,"[menggigil, batuk, sakit kepala]"
