In [45]:
import xml.etree.ElementTree as ET
import pandas as pd
from tqdm import tqdm  # Import tqdm for the progress bar

z1_xml_file = './FRB_z1/Z1_data.xml'
schema_file = './FRB_z0/Z1_Z1.xsd'


# Define the namespaces

namespaces = {
    'kf': 'http://www.federalreserve.gov/structure/compact/Z1_Z1',
    'frb': 'http://www.federalreserve.gov/structure/compact/common',
    'common': 'http://www.SDMX.org/resources/SDMXML/schemas/v1_0/common'
    
}
# Parse the XML file
tree = ET.parse(z1_xml_file)
root = tree.getroot()

# Iterate through kf:Series elements
all_series = []

column_names =['Date','Obs_value','Series_name']

z1df = pd.DataFrame(columns=column_names)

# Find all kf:Series elements
series_elements = root.findall('.//kf:Series', namespaces)

# Iterate through kf:Series elements with a progress bar
for series in tqdm(series_elements, desc="Processing series"):
    series_attributes = series.attrib
    
    # Extract frb:Annotations data
    annotations_element = series.find('frb:Annotations', namespaces)
    #print("Annotations Element:", annotations_element)  # Debug print
    if annotations_element is None:
        print('Error: Annotations element not found')
        continue
    
    for annotation in annotations_element.findall('common:Annotation', namespaces):
        annotation_type = annotation.find('common:AnnotationType', namespaces).text
        annotation_text = annotation.find('common:AnnotationText', namespaces).text
        series_attributes[annotation_type] = annotation_text

     # Iterate through frb:Obs elements within each kf:Series (if needed)
    obs_value=[]
    time_period=[]
    for obs in series.findall('frb:Obs', namespaces):
        obs_value.append(obs.get('OBS_VALUE'))
        time_period.append(obs.get('TIME_PERIOD'))

    temp_dic = {'Date':time_period,'Obs_value':obs_value}
    tempdf= pd.DataFrame(temp_dic)
    tempdf['Series_name'] = series_attributes['SERIES_NAME'] 
    
    all_series.append(series_attributes)
    
    # Stack the temporary DataFrame to the main DataFrame
    z1df = pd.concat([z1df, tempdf], ignore_index=True)

    
    #print("Updated Series Attributes:", series_attributes)  # Print updated series attributes

# show Z1df

z1df.head()

KeyboardInterrupt: 