# Download PTX images that are uploaded in Local Paths

### Prerequisites
* Locate your imaging files: local paths

In [14]:
import requests
import pandas as pd
import pydicom
import matplotlib.pyplot as plt
from pathlib import Path
from urllib3.filepost import encode_multipart_formdata, choose_boundary
from azure.identity import DefaultAzureCredential

## Only One DICOM Data

In [None]:
one_file_path = "../files/PTX-CXR/train/Pneumothorax//000001.dcm"

def flatten_metadata(file_path): 
    #Load dcm data
    ds = pydicom.dcmread(file_path)

    study_uid = ds.StudyInstanceUID
    series_uid = ds.SeriesInstanceUID

    flat_metadata = []
    for elem in ds.iterall():
        tag = f"{elem.tag.group:04x}{elem.tag.element:04x}" #태그 형식 변환
        vr = elem.VR
        value = elem.value if elem.value else 'None'

        if isinstance(value, list):
           value = ', '.join(map(str, value)) #각 객체를 문자열로 변환
        
        flat_metadata.append({
            'StudyUID': study_uid, 
            'SeriesUID': series_uid, 
            'Tag': tag, 
            'vr': vr, 
            'Value': value
        })
    return flat_metadata

flat_one_metadata_df = pd.DataFrame(flatten_metadata(one_file_path))
flat_one_metadata_df

Unnamed: 0,StudyUID,SeriesUID,Tag,vr,Value
0,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080005,CS,ISO_IR 100
1,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080016,UI,1.2.840.10008.5.1.4.1.1.7
2,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080018,UI,1.2.276.0.7230010.3.1.4.8323329.13666.15178752...
3,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080020,DA,19010101
4,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080030,TM,000000.00
5,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080050,SH,
6,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080060,CS,CR
7,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080064,CS,WSD
8,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,00080090,PN,
9,1.2.276.0.7230010.3.1.2.8323329.13666.15178752...,1.2.276.0.7230010.3.1.3.8323329.13666.15178752...,0008103e,LO,view: AP


## All data

In [72]:
import os

folder_path_true = "../files/PTX-CXR/train/Pneumothorax"
folder_path_false = "../files/PTX-CXR/train/No Pneumothorax"

def extract_all_metadata(folder_path):
    metadata = []
    for file_name in os.listdir(folder_path):
            if file_name.endswith(".dcm") : 
                file_path = os.path.join(folder_path, file_name)
                metadata.extend(flatten_metadata(file_path))    
            else : 
                 continue
    return metadata

ptx_data = extract_all_metadata(folder_path_true)
ptx_df = pd.DataFrame(ptx_data)

none_ptx_data = extract_all_metadata(folder_path_false)
none_ptx_df = pd.DataFrame(none_ptx_data)

In [75]:
print(ptx_df['SeriesUID'].nunique())

75


In [80]:
print(none_ptx_df['SeriesUID'].nunique())

175
