# Processing Clinical Data

The clinical information is stored in xml documents, which can be parsed using the python package <a href="https://github.com/martinblech/xmltodict">xmltodict</a>. The fields likely to be of interest were identified and are in the table below. Additional information can be found at https://docs.gdc.cancer.gov/Data_Dictionary/viewer/ and the <a href="https://cdebrowser.nci.nih.gov/cdebrowserClient/cdeBrowser.html?elementDetails=9&FirstTimer=0&PageId=ElementDetailsGroup&publicId=3081934&version=3.0#/search">cde browser</a>. 


column              | type         | variable    | keys                                  | description
------------------- | ------------ | ----------- | ------------------------------------- | -----------
HistologicDiagnosis | VARCHAR(250) | hist_diag   | ["shared:histological_type"]          | 
PrevMalignancy      | VARCHAR(250) | mal_history | ["shared:other_dx"]                   | 
Gender              | CHAR(1)      | gender      | ["shared:gender"]                     | FEMALE
VitalStatus         | VARCHAR(250) | vital_stat  | ["clin_shared:vital_status"]          | Alive or Dead or None
CauseDeath          | VARCHAR(250) | cause_death | ["clin_shared:patient_death_reason"]  | 
DaysToBirth         | INT          | days_birth  | ["clin_shared:days_to_birth"]         | Negative integer or None
DaysToDeath         | INT          | days_death  | ["clin_shared:days_to_death"]         | integer or None
DaysToFollowUp      | INT          | days_follow | ["clin_shared:days_to_last_followup"] | integer or None
Race                | VARCHAR(250) | race        | ["clin_shared:race"]                  |
CaseID              | CHAR(36)     | uuid        | ["shared:bcr_patient_uuid"]           | 
PatientBarcode      | VARCHAR(50)  | barcode     | ["shared:bcr_patient_barcode"]        |
HistoryTreatment    | VARCHAR(250) | treat_history | ["shared:history_of_neoadjuvant_treatment"] | 
HistologyICD10      | VARCHAR(250) | icd10       | ["clin_shared:icd_10"]                | 
Prospective         | VARCHAR(250) | prospective | ["clin_shared:tissue_prospective_collection_indicator"] |
Retrospective       | VARCHAR(250) | retrospect  | ["clin_shared:tissue_retrospective_collection_indicator"] | 
AgeDiagnosis        | INT          | age_diag    | ["clin_shared:age_at_initial_pathologic_diagnosis"] | 
YearDiagnosis       | INT          | year_diag   | ["clin_shared:year_of_initial_pathologic_diagnosis"] | 
Ethnicity           | VARCHAR(250) | ethnicity   | ["clin_shared:ethnicity"]             |
DiseaseStatus       | VARCHAR(250) | tumor_stat  | ["clin_shared:person_neoplasm_cancer_status"] | TUMOR FREE or WITH TUMOR or None
AnatomicSubdivision | VARCHAR(250) | subdiv      | ["clin_shared:anatomic_neoplasm_subdivision"] | Replaced with "_other" when necessary
LymphNodesCounted   | VARCHAR(250) | lymph_node  | ["clin_shared:lymph_node_examined_count"] |
LymphNodesPosHE     | VARCHAR(250) | lymph_he    | ["clin_shared:number_of_lymphnodes_positive_by_he"] |
LymphNodesPosIHC    | VARCHAR(250) | lymph_ihc   | ["clin_shared:number_of_lymphnodes_positive_by_ihc"] |
TumorStage          | VARCHAR(250) | stage       | ["shared_stage:stage_event"]["shared_stage:pathologic_stage"]  or ["shared_stage:stage_event"]["shared_stage:clinical_stage"]| 
AJCCTumor           | VARCHAR(250) | ajcc_tumor  | ["shared_stage:stage_event"]["shared_stage:tnm_categories"]["shared_stage:pathologic_categories"]["shared_stage:pathologic_T"] | AJCC staging
AJCCNodes           | VARCHAR(250) | ajcc_nodes  | ["shared_stage:stage_event"]["shared_stage:tnm_categories"]["shared_stage:pathologic_categories"]["shared_stage:pathologic_N"] |
AJCCMetastasis      | VARCHAR(250) | ajcc_metastasis | ["shared_stage:stage_event"]["shared_stage:tnm_categories"]["shared_stage:pathologic_categories"]["shared_stage:pathologic_M"] |
TumorGrade          | VARCHAR(250) | tumor_grade | ["shared:neoplasm_histologic_grade"] |
ResidualTumor       | VARCHAR(250) | resid_tumor | ["clin_shared:residual_tumor"]       |
HistoryReflux       | VARCHAR(250) | hist_reflux | ["esca_stad_shared:reflux_history"]  | 
RefluxTreatment     | VARCHAR(250) | reflux_treat| ["stad:antireflux_treatment_types"]["esca_stad_shared:antireflux_treatment_type"] | 
HistoryBarretts     | VARCHAR(250) | barretts    | ["esca_stad_shared:barretts_esophagus"] |
HistoryHPylori      | VARCHAR(250) | hpylori     | ["esca_stad_shared:h_pylori_infection"] |
FamilyHistory       | VARCHAR(250) | family_hist | ["stad:family_history_of_stomach_cancer"] | 
PharmTherapy        | VARCHAR(250) | pharm_therapy | ["clin_shared:targeted_molecular_therapy"] | 
RadiationTherapy    | VARCHAR(250) | rad_therapy | ["clin_shared:radiation_therapy"]    |
TreatmentOutcome    | VARCHAR(250) | treat_out   | ["clin_shared:primary_therapy_outcome_success"] |
NewTumor            | VARCHAR(250) | new_tumor   | ["stad_nte:new_tumor_events"]["nte:new_tumor_event_after_initial_treatment"] | 
DaysToNewTumor      | INT          | days_new_tumor | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["nte:days_to_new_tumor_event_after_initial_treatment"]| Days to new tumor after initial treatment
NewTumorType        | VARCHAR(250) | event_new_tumor | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["stad_nte:new_neoplasm_event_types"]["nte:new_neoplasm_event_type"] | None or Locoregional Recurrence or Distant Metastasis or New Primary Tumor
NewTumorEvidence    | VARCHAR(250) | evid_new_tumor | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["stad_nte:progression_determined_by_list"]["nte:progression_determined_by"] | 
NewTumorSurgery     | VARCHAR(250) | new_surgery | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["stad_nte:locoregional_procedure"]["nte:additional_surgery_locoregional_procedure"] | None or YES or NO
NewTumorResidual    | VARCHAR(250) | resid_new   | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["stad_nte:locoregional_procedure"]["nte:residual_disease_post_new_tumor_event_margin_status"]
NewMetSite          | VARCHAR(250) | met_site    | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["stad_nte:metastatic_procedure"]["nte:new_neoplasm_event_occurrence_anatomic_site"] |
NewMetSurgery       | VARCHAR(250) | met_surgery | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["stad_nte:metastatic_procedure"]["nte:additional_surgery_metastatic_procedure"] | None or YES or NO
NewRadiationTherapy | VARCHAR(250) | new_rad     | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["nte:additional_radiation_therapy"] | None or YES or NO
NewPharmTherapy     | VARCHAR(250) | new_pharm   | ["stad_nte:new_tumor_events"]["stad_nte:new_tumor_event"]["nte:additional_pharmaceutical_therapy"] | 
TotalDose           | VARCHAR(250) | tot_dose    | ["rx:drugs"]["rx:total_dose"]    |
TotalDoseUnits      | VARCHAR(250) | tot_dose_units | ["rx:drugs"]["rx:total_dose_units"] |
PrescribedDose      | VARCHAR(250) | presc_dose  | ["rx:drugs"]["rx:prescribed_dose"] |
PrescribedDoseUnits | VARCHAR(250) | presc_dose_units | ["rx:drugs"]["rx:prescribed_dose_units"] |
DrugCycles          | INT          | cycles      | ["rx:drugs"]["rx:number_cycles"]   |
DaysToDrugStart     | INT          | start_days  | ["rx:drugs"]["rx:days_to_drug_therapy_start"] |
DaysToDrugEnd       | INT          | end_days    | ["rx:drugs"]["rx:days_to_drug_therapy_end"] |
TherapyType         | VARCHAR(250) | ther_types  | ["rx:drugs"]["rx:therapy_types"]["rx:therapy_type"] |
DrugName            | VARCHAR(250) | drug_names  | ["rx:drugs"]["rx:drug_name"]       |
TherapyRegimenType  | VARCHAR(250) | ther_reg    | ["rx:drugs"]["clin_shared:regimen_indication"] |
TherapyOngoing      | VARCHAR(250) | ther_ongoing | ["rx:drugs"]["rx:therapy_ongoing"] |
DrugResponse        | VARCHAR(250) | resp        | ["rx:drugs"]["clin_shared:measure_of_response"] |
DaysToRadStart      | INT          | rad_starts  | rad["rad:days_to_radiation_therapy_start"]   |
DaysToRadEnd        | INT          | rad_ends    | rad["rad:days_to_radiation_therapy_end"]     |
RadiationType       | VARCHAR(250) | rad_types   | rad["rad:radiation_type"]                    |
RadiationDose       | VARCHAR(250) | rad_doses   | rad["rad:radiation_dosage"]                  |
RadiationUnits      | VARCHAR(250) | rad_units   | rad["rad:units"]                             |
RadiationNumFrac    | INT          | rad_fracs   | rad["rad:numfractions"]                      |
RadiationSite       | VARCHAR(250) | rad_sites   | rad["rad:anatomic_treatment_site"]           |
RadRegimenType      | VARCHAR(250) | rad_regimen | rad["clin_shared:regimen_indication"]        |
RadiationOngoing    | VARCHAR(250) | rad_ongoings | rad["rad:radiation_treatment_ongoing"]      |
RadiationResponse   | VARCHAR(250) | rad_responses | rad["clin_shared:measure_of_response"]     |
FollowUpOutcome     | VARCHAR(250) | treat_out_new | follow_up["follow_up_v1.0:followup_treatment_success"] |

In the metadata, the "case_id" is equivalant to the "shared:bcr_patient_uuid" in the clinical data.

In [None]:
import xmltodict
import os
import pandas as pd

def fetch_clinical(clin_dir, project, out_file):
    df = pd.DataFrame(columns=["PatientBarcode", "HistologicDiagnosis", "VitalStatus", "CauseDeath", 
                               "DaysToBirth", "DaysToDeath", "DaysToFollowUp", "Race", "Ethnicity", 
                               "AgeDiagnosis", "YearDiagnosis", "DiseaseStatus", "TumorStage","TreatmentOutcome", 
                               "NewTumor", "DaysToNewTumor", "TherapyType", "DrugName", "RadiationType", 
                               "FollowUpDays", "AdditionalTreatmentOutcome", "AdditionalNewTumor", "AdditionalNewTumorDays"])
    for directory in os.listdir(clin_dir):
        if os.path.isdir("{}/{}".format(clin_dir, directory)):
            for filename in os.listdir("{}/{}".format(clin_dir, directory)):
                fname = "{}/{}/{}".format(clin_dir, directory, filename)
                if "xml" not in filename:
                    continue
                with open(fname, "r") as fin:
                    doc = xmltodict.parse(fin)
                    
                    start_tag = "{}:tcga_bcr".format(project)
                    patient_tag = "{}:patient".format(project)
                    
                    treatment_tag = "{}_shared:treatment".format(project)
                    follow_ups_tag = "{}:follow_ups".format(project)
                    
                     # Not present in LAML clinical data
                    new_tumor_tag_s = "{}_nte:new_tumor_events".format(project)
                    new_tumor_tag = "{}_nte:new_tumor_event".format(project)
                    
                    out_dict = {}
                    uuid = doc[start_tag][patient_tag]["shared:bcr_patient_uuid"].get("#text")
                    
                    ## Parse Patient Data ##
                    patient_info = [("PatientBarcode", "shared:bcr_patient_barcode", "str"), 
                                    ("HistologicDiagnosis", "shared:histological_type", "str"), 
                                    ("VitalStatus", "clin_shared:vital_status", "str"), 
                                    ("CauseDeath", "clin_shared:patient_death_reason", "str"), 
                                    ("Ethnicity", "clin_shared:ethnicity", "str"), 
                                    ("DiseaseStatus", "clin_shared:person_neoplasm_cancer_status", "str"), 
                                    ("TreatmentOutcome", "clin_shared:primary_therapy_outcome_success", "str"),
                                    ("DaysToBirth", "clin_shared:days_to_birth", "int"),
                                    ("DaysToDeath", "clin_shared:days_to_death", "int"),
                                    ("DaysToFollowUp", "clin_shared:days_to_last_followup", "int"),
                                    ("AgeDiagnosis", "clin_shared:age_at_initial_pathologic_diagnosis", "int"),
                                    ("YearDiagnosis", "clin_shared:year_of_initial_pathologic_diagnosis", "int")]
                    for col, tag, text_type in patient_info:
                        targ_text = doc[start_tag][patient_tag].get(tag, {}).get("#text")
                        if targ_text is not None:
                            if text_type == "str":
                                out_dict[col] = str(targ_text)
                            elif text_type == "int":
                                out_dict[col] = int(targ_text)
                            else:
                                out_dict[col] = targ_text
                        else:
                            out_dict[col] = targ_text

                    if out_dict["DaysToBirth"] is not None:
                        out_dict["DaysToBirth"] = out_dict["DaysToBirth"]*-1

                    ## Nested Tags ##
                    nested_tag = [("Race", "clin_shared:race_list", "clin_shared:race"), 
                                  ("TumorStage", "shared_stage:stage_event", "shared_stage:clinical_stage")]
                    for col, tag1, tag2 in nested_tag:
                        targ_text = doc[start_tag][patient_tag].get(tag1, {}).get(tag2, {}).get("#text")
                        if targ_text is not None:
                            out_dict[col] = str(targ_text)
                        else:
                            out_dict[col] = targ_text
                        
                    ## New Tumor information ##
                    has_new_tumor_tag = "nte:new_tumor_event_after_initial_treatment"
                    out_dict["NewTumor"] = doc[start_tag][patient_tag].get(new_tumor_tag_s, {}).get(has_new_tumor_tag, {})\
                    .get("#text")
                    if doc[start_tag][patient_tag].get(new_tumor_tag_s, {}).get(new_tumor_tag) is None:
                        out_dict["DaysToNewTumor"] = None
                    else:
                        new_tumor_info = doc[start_tag][patient_tag][new_tumor_tag_s][new_tumor_tag]
                        days_tag = "nte:days_to_new_tumor_event_after_initial_treatment"
                        if isinstance(new_tumor_info, list):
                            days_tumor = [tumor.get(days_tag, {}).get("#text") for tumor in new_tumor_info]
                            days_tumor_strs = ["NA" if day is None else day for day in days_tumor]
                            out_dict["DaysToNewTumor"] = ";".join(days_tumor_strs)
                        else:
                            out_dict["DaysToNewTumor"] = new_tumor_info.get(days_tag, {}).get("#text")
                  
                    ## Drug information ##
                    drugs = doc[start_tag][patient_tag].get("rx:drugs")
                    if drugs is None:
                        out_dict["TherapyType"] = None
                        out_dict["DrugName"] = None
                    elif isinstance(drugs["rx:drug"], list):
                        therapy_type = []
                        drug_name = []
                        for drug in drugs["rx:drug"]:
                            if isinstance(drug["rx:therapy_types"]["rx:therapy_type"], list):
                                ther_strs = [ther.get("#text") for ther in drug["rx:therapy_types"]["rx:therapy_type"]]
                                therapy_type += ["-".join(ther_strs)]
                            else:
                                therapy_type += [drug["rx:therapy_types"]["rx:therapy_type"].get("#text")]
                            drug_name += [drug["rx:drug_name"].get("#text")]
                        therapy_type_strs = ["NA" if ther is None else ther for ther in therapy_type]
                        drug_name_strs = ["NA" if drug is None else drug for drug in drug_name]
                        
                        out_dict["TherapyType"] = "; ".join(therapy_type_strs)
                        out_dict["DrugName"] = "; ".join(drug_name_strs)
                    else:
                        if isinstance(drugs["rx:drug"]["rx:therapy_types"]["rx:therapy_type"], list): 
                            ther_strs = [ther.get("#text") for ther in drugs["rx:drug"]["rx:therapy_types"]["rx:therapy_type"]]
                            out_dict["TherapyType"] = "-".join(ther_strs)
                        else:
                            out_dict["TherapyType"] = drugs["rx:drug"]["rx:therapy_types"]["rx:therapy_type"].get("#text")
                        out_dict["DrugName"] = drugs["rx:drug"]["rx:drug_name"].get("#text")
                    
                    ## Radiation Information ##
                    rads = doc[start_tag][patient_tag]["rad:radiations"]
                    if rads is None:
                        out_dict["RadiationType"] = None
                    elif isinstance(rads["rad:radiation"], list):
                        rad_type = [rad.get("rad:radiation_type", {}).get("#text") for rad in rads["rad:radiation"]]
                        rad_type_strs = ["NA" if rad is None else rad for rad in rad_type]
                        out_dict["RadiationType"] = "; ".join(rad_type_strs)
                    else:
                        out_dict["RadiationType"] = rads["rad:radiation"].get("rad:radiation_type", {}).get("#text")
                        
                        
                    # update based on follow up information
                    follows = doc[start_tag][patient_tag].get(follow_ups_tag)
                    if follows is None:
                        out_dict["FollowUpDays"] = None
                        out_dict["AdditionalTreatmentOutcome"] = None
                        out_dict["AdditionalNewTumor"] = None
                        out_dict["AdditionalNewTumorDays"] = None
                    else:
                        fol_up = follows.keys()
                        # get the latest follow up
                        get_follow_ups = follows[fol_up[-1]]
                        if isinstance(get_follow_ups, list):
                            
                            follow_up_days_list = []
                            add_out_list = []
                            add_tumor_list = []
                            add_tumor_days = []
                            
                            for follow in get_follow_ups:
                                follow_days_tag = "clin_shared:days_to_last_followup"
                                follow_outcome_tag = "clin_shared:additional_treatment_completion_success_outcome"
                                follow_day = follow.get(follow_days_tag, {}).get("#text")
                                if follow_day is not None:
                                    follow_day = int(follow_day)
                                follow_up_days_list += [follow_day]
                                add_out_list += [follow.get(follow_outcome_tag, {}).get("#text")]
                                
                                follow_new_tumor = follow.get(new_tumor_tag_s, {}).get(new_tumor_tag)
                                if follow_new_tumor is None:
                                    add_tumor_list += ["NA"]
                                    add_tumor_days += ["NA"]
                                elif isinstance(follow_new_tumor, list):
                                    new_tumor_list = [new_tumor.get("nte:new_neoplasm_event_type", {}).get("#text") 
                                                      for new_tumor in follow_new_tumor]
                                    new_tumor_list_str = ["NA" if ttype is None else ttype for ttype in new_tumor_list]
                                    add_tumor_list += ["-".join(new_tumor_list_str)]
                                    
                                    days_tumor_tag = "nte:days_to_new_tumor_event_after_initial_treatment"
                                    new_tumor_days = [new_tumor.get(days_tumor_tag, {}).get("#text") for
                                                      new_tumor in follow_new_tumor]
                                    new_tumor_days_str = ["NA" if tdays is None else tdays for tdays in new_tumor_days]
                                    add_tumor_days += ["-".join(new_tumor_days_str)]
                                else:
                                    add_tumor_list += [follow_new_tumor.get("nte:new_neoplasm_event_type", {}).get("#text")]
                                    days_tumor_tag = "nte:days_to_new_tumor_event_after_initial_treatment"
                                    add_tumor_days += [follow_new_tumor.get(days_tumor_tag, {}).get("#text")]

                                if out_dict["DaysToFollowUp"] < follow_day:
                                    additional_t = [("VitalStatus", "clin_shared:vital_status", "str"), 
                                                    ("CauseDeath", "clin_shared:patient_death_reason", "str"),
                                                    ("DiseaseStatus", "clin_shared:person_neoplasm_cancer_status", "str"),
                                                    ("TreatmentOutcome", "clin_shared:primary_therapy_outcome_success", "str"),
                                                    ("DaysToDeath", "clin_shared:days_to_death", "int"),
                                                    ("DaysToFollowUp", "clin_shared:days_to_last_followup", "int")]
                                    
                                    for col, tag, text_type in additional_t:
                                        targ_text = follow.get(tag, {}).get("#text")
                                        if targ_text is not None:
                                            if text_type == "str":
                                                out_dict[col] = str(targ_text)
                                            elif text_type == "int":
                                                out_dict[col] = int(targ_text)
                                            else:
                                                out_dict[col] = targ_text
                                        else:
                                            out_dict[col] = targ_text
                                        
                            out_dict["FollowUpDays"] = "; ".join([str(x) for x in follow_up_days_list])
                            out_dict["AdditionalTreatmentOutcome"] = "; ".join([x if x is not None else "NA" for x in 
                                                                                add_out_list])
                            out_dict["AdditionalNewTumor"] = "; ".join([x if x is not None else "NA" for x in 
                                                                        add_tumor_list])
                            out_dict["AdditionalNewTumorDays"] = "; ".join([x if x is not None else "NA" for x in 
                                                                            add_tumor_days])
                        else:
                            follow_days_tag = "clin_shared:days_to_last_followup"
                            follow_outcome_tag = "clin_shared:additional_treatment_completion_success_outcome"
                            follow_day = get_follow_ups.get(follow_days_tag, {}).get("#text")
                            if follow_day is not None:
                                follow_day = int(follow_day)
                            out_dict["AdditionalTreatmentOutcome"] = get_follow_ups.get(follow_outcome_tag, {}).get("#text")
                            out_dict["FollowUpDays"] = follow_day
                            
                            follow_new_tumor = get_follow_ups.get(new_tumor_tag_s, {}).get(new_tumor_tag)
                            if follow_new_tumor is None:
                                out_dict["AdditionalNewTumor"] = "NA"
                                out_dict["AdditionalNewTumorDays"] = "NA"
                            elif isinstance(follow_new_tumor, list):
                                new_tumor_list = [new_tumor.get("nte:new_neoplasm_event_type", {}).get("#text") 
                                                  for new_tumor in follow_new_tumor]
                                new_tumor_list_str = ["NA" if ttype is None else ttype for ttype in new_tumor_list]
                                out_dict["AdditionalNewTumor"] = "-".join(new_tumor_list_str)

                                days_tumor_tag = "nte:days_to_new_tumor_event_after_initial_treatment"
                                new_tumor_days = [new_tumor.get(days_tumor_tag, {}).get("#text") for
                                                  new_tumor in follow_new_tumor]
                                new_tumor_days_str = ["NA" if tdays is None else tdays for tdays in new_tumor_days]
                                out_dict["AdditionalNewTumorDays"] = "-".join(new_tumor_days_str)
                            else:
                                out_dict["AdditionalNewTumor"] = follow_new_tumor\
                                .get("nte:new_neoplasm_event_type", {}).get("#text")
                                days_tumor_tag = "nte:days_to_new_tumor_event_after_initial_treatment"
                                out_dict["AdditionalNewTumorDays"] = follow_new_tumor.get(days_tumor_tag, {}).get("#text")
                            
                            if out_dict["DaysToFollowUp"] < follow_day:
                                additional_t = [("VitalStatus", "clin_shared:vital_status", "str"), 
                                                ("CauseDeath", "clin_shared:patient_death_reason", "str"), 
                                                ("DiseaseStatus", "clin_shared:person_neoplasm_cancer_status", "str"), 
                                                ("TreatmentOutcome", "clin_shared:primary_therapy_outcome_success", "str"),
                                                ("DaysToDeath", "clin_shared:days_to_death", "int"),
                                                ("DaysToFollowUp", "clin_shared:days_to_last_followup", "int")]
                                for col, tag, text_type in additional_t:
                                    targ_text = get_follow_ups.get(tag, {}).get("#text")
                                    if targ_text is not None:
                                        if text_type == "str":
                                            out_dict[col] = str(targ_text)
                                        elif text_type == "int":
                                            out_dict[col] = int(targ_text)
                                        else:
                                            out_dict[col] = targ_text
                                    else:
                                        out_dict[col] = targ_text
                                    
                    df.loc[uuid] = out_dict
                    
    df.to_csv(out_file, encoding='utf-8')

In [None]:
import os

def tcga_clin_summary(tcga_dir):
    for in_dir in os.listdir(tcga_dir):
        out_file = "TCGA Summary/Clinical/{} Clinical Data Summary.csv".format(in_dir)
        print "Processing {}".format(in_dir)
        fetch_clinical("{}/{}/Clinical".format(tcga_dir, in_dir), in_dir.lower(), out_file)