# Generate Target Kaplan Meier Curves 

### Install Necessary Plotting Library
To plot kaplan meier curves the _lifelines_ library is required 

In [None]:
!pip install lifelines -q

In [None]:
from lifelines import KaplanMeierFitter
import matplotlib.pyplot as plt

In [None]:
from ClinicalDocumentUtils import Database

## Find prostate cancer patients
Use the Database Class to find mrns for prostate cancer patients

In [None]:
d = Database()

## Get list of projects and their UUID

In [None]:
d.fetch_projects()

### Set Project UUID

In [None]:
d.set_project_uuid('02bae428-4312-4f97-8488-bf511fd2d44e')

### Get unique targets
Gets the target-ids which meet the specific criteria. In this example it's every "surgery" tag associated with an ICD10 code starting with c44. Note there is a wildcard "%"

In [None]:
filters = [
    {'icd10':'c44%', 'tag':'surgery'},
]
print(f"{filters=}")

mrn_targets = d.get_mrn_targets_where_filter(filters)
print(f"mrn_targets:{mrn_targets}")

## Define start and event
What tag defines the start time (eg. surgery, radiation)
What tag defines an event (eg. death, biochemical_progression), there may be multiple

In [None]:
icd10 = 'c44%'
start_tag = 'surgery'
event_tags = ['local-progression', 'death']

### Use the method _kaplan_meier_ to get kaplan meier data based on targets

In [None]:
output = d.target_kaplan_meier(mrn_targets, icd10, start_tag, event_tags)
output

### Plot The Kaplan Meier Curve

In [None]:
# Extract Data
mrn_targets = output['mrn_targets']
durations = output['durations']
events = output['events']

# Generate Kaplan Meier
kmf = KaplanMeierFitter()
kmf.fit(durations, event_observed=events)
kmf.plot_survival_function(at_risk_counts=True)
plt.title('Kaplan-Meier Curve')
plt.show()
plt.close()

In [None]:
# Delete Database Connection
del d