# Download PTX images that are uploaded in Local Paths

### Prerequisites
* Locate your imaging files: local paths

In [1]:
import requests
import pandas as pd
import pydicom
import matplotlib.pyplot as plt
from pathlib import Path
from urllib3.filepost import encode_multipart_formdata, choose_boundary
from azure.identity import DefaultAzureCredential

## Only One DICOM Data

In [31]:
one_file_path = "../files/PTX-CXR/train/Pneumothorax//000001.dcm"

def flatten_metadata(file_path): 
    #Load dcm data
    ds = pydicom.dcmread(file_path)

    study_uid = ds.StudyInstanceUID
    series_uid = ds.SeriesInstanceUID

    flat_metadata = []
    for elem in ds.iterall():
        tag = f"{elem.tag.group:04x}{elem.tag.element:04x}" #태그 형식 변환
        vr = elem.VR
        name = elem.name
        value = elem.value if elem.value else 'None'

        if isinstance(value, list):
           value = ', '.join(map(str, value)) #각 객체를 문자열로 변환
        
        flat_metadata.append({
            'StudyUID': study_uid, 
            'SeriesUID': series_uid, 
            'Tag': tag, 
            'name': name,
            'vr': vr, 
            'Value': value
        })
    return flat_metadata

flat_one_metadata_df = pd.DataFrame(flatten_metadata(one_file_path))
flat_one_metadata_df

Unnamed: 0,StudyUID,SeriesUID,Tag,name,vr,Value
0,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080005,Specific Character Set,CS,ISO_IR 100
1,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080016,SOP Class UID,UI,1.2.840.10008.5.1.4.1.1.7
2,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080018,SOP Instance UID,UI,1.2.276.0.7230010.3.1.4.8323329.13666.15178752...
3,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080020,Study Date,DA,19010101
4,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080030,Study Time,TM,000000.00
5,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080050,Accession Number,SH,
6,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080060,Modality,CS,CR
7,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080064,Conversion Type,CS,WSD
8,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080090,Referring Physician's Name,PN,
9,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,0008103e,Series Description,LO,view: AP


In [16]:
print("StudyUID Kind : ", flat_one_metadata_df["StudyUID"].nunique())
print("SereisUID Kind : ", flat_one_metadata_df["SeriesUID"].nunique())
print("Tag kind: ", flat_one_metadata_df["Tag"].nunique())
print("vr kind: ", flat_one_metadata_df["vr"].nunique())

StudyUID Kind :  1
SereisUID Kind :  1
Tag kind:  35
vr kind:  12


<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000105FF50ED90>

In [30]:
grouped = flat_one_metadata_df.groupby("vr")[['Tag', 'name', 'Value']].apply(lambda x: x.reset_index(drop=True))
grouped

Unnamed: 0_level_0,Unnamed: 1_level_0,Tag,Value
vr,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AS,0,00101010,34
CS,0,00080005,ISO_IR 100
CS,1,00080060,CR
CS,2,00080064,WSD
CS,3,00100040,M
CS,4,00180015,CHEST
CS,5,00185101,AP
CS,6,00200020,
CS,7,00280004,MONOCHROME2
CS,8,00282110,01


## All data

In [3]:
import os

folder_path_true = "../files/PTX-CXR/train/Pneumothorax"
folder_path_false = "../files/PTX-CXR/train/No Pneumothorax"

def extract_all_metadata(folder_path):
    metadata = []
    for file_name in os.listdir(folder_path):
            if file_name.endswith(".dcm") : 
                file_path = os.path.join(folder_path, file_name)
                metadata.extend(flatten_metadata(file_path))    
            else : 
                 continue
    return metadata

ptx_data = extract_all_metadata(folder_path_true)
ptx_df = pd.DataFrame(ptx_data)

none_ptx_data = extract_all_metadata(folder_path_false)
none_ptx_df = pd.DataFrame(none_ptx_data)

In [6]:
print(ptx_df['Tag'].nunique())

35


In [7]:
print(none_ptx_df['Tag'].nunique())

35
