""" @author = abhijeet mote abhijeetmote@gmail.compile Github : https://github.com/abhijeetmote LinkedIn : https://www.linkedin.com/in/abhijeet-mote/ """ import xml.etree.ElementTree as ET def parse_phone_xml(filename): """ @author = abhijeet mote abhijeetmote@gmail.com Github : https://github.com/abhijeetmote LinkedIn : https://www.linkedin.com/in/abhijeet-mote/ This Method is used to parse phone XML file . Parameters ---------- filename : name of the phoone XML file . example : parse_phone_xml('C:\digital_congni\phonenumbers.xml') """ tree = ET.parse(filename) root = tree.getroot() final_dicts = {} node_dict = {} counter = 0 for i in root.findall('.//'): if i.tag == 'territory': node_dict[i.tag] = i.get('countryCode') elif i.tag == 'availableFormats': sub_child_pattern = [] sub_child_leadingDigits = [] sub_child_format = [] for sub_child in i.findall('.//'): if sub_child.tag == 'numberFormat': sub_child_pattern.append(sub_child.get('pattern')) node_dict[sub_child.tag] = sub_child_pattern elif sub_child.tag == 'leadingDigits': sub_child_leadingDigits.append(sub_child.text.replace(' ','').replace('\n','')) node_dict[sub_child.tag] = sub_child_leadingDigits elif sub_child.tag == 'format': #import pdb;pdb.set_trace() sub_child_format.append(sub_child.text) node_dict[sub_child.tag] = sub_child_format final_dicts[counter] = node_dict counter += 1 node_dict = {} ss = pd.DataFrame.from_dict(final_dicts, orient='index') ss = ss.replace(np.nan, "") for index, row in ss.iterrows(): ss.iloc[index].territory = [ss.iloc[index].territory] * len(ss.iloc[index].numberFormat) #ss.iloc[index].format = [ss.iloc[index].format] * len(ss.iloc[index].numberFormat) df = pd.DataFrame() for index, row in ss.iterrows(): lst = [] for i in ss.iloc[index]: lst.append(i) df = df.append([pd.DataFrame(lst).T]) df.columns = ['countryCode', 'pattern', 'leadingDigits', 'format'] return df df = parse_phone_xml('C:\digital_congni\phonenumbers.xml') print(df)