In [None]:
import traceback

import pandas as pd
import re
import os
import xml.etree.ElementTree as ET

In [None]:
"""
WSUS GPO Investigation Script
-----------------------------

Parses a backup directory of Group Policy Objects (GPOs) exported in XML format to extract
policy metadata and identify any GPOs that reference WSUS servers or update-related configurations.

Main Capabilities:
------------------
- Recursively scans backup folders to collect `.xml` GPO policy files
- Parses XML structure into flat key-value metadata per GPO
- Extracts creation and modification timestamps
- Searches for keywords like 'update server' or 'wsus' in file contents
- Generates:
    - Full parsed metadata CSV (`.csv`)
    - Filtered subset with WSUS/update-related references (`.csv`)

Use Cases:
----------
- Compliance and patch management audits
- Detection of unauthorized WSUS configurations
- GPO policy visibility and reporting

Author: Gabe McWilliams
"""


In [None]:
source_dir = "./data/gpo_backup_2024_07_16"

In [None]:
folders_set = set()

for root, dirs, files in os.walk(source_dir):
    for directory in dirs[1:]:
        result = re.sub(r'\\{1}.*', '', root.replace(f'{source_dir}\\', "")),
        # print(fr'{source_dir}/{result[0]}')
        folders_set.add(fr'{source_dir}/{result[0]}')

files_list = []
for folder in folders_set:
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.lower().endswith('xml'):
                files_list.append(
                    {
                        "gpo_uuid": (re.sub(r'[{}]+', '', (folder.replace(f'{source_dir}/', "")))),
                        "file_info": {
                            'filepath': f'{root}/{file}',
                            "file_name": file,
                            'folder': folder,
                            'created_at': pd.to_datetime(os.path.getctime(f'{root}/{file}')),
                            'last_modified_at': pd.to_datetime(os.path.getmtime(f'{root}/{file}'))
                        }
                    }
                )

In [None]:
def strip_tag(tag: str) -> str:
    return str(re.sub(r'\{.*\}', '', tag))


def strip_text(text: str | None) -> str:
    if text is None:
        return ''
    return str(re.sub(r'[{}]+', '', text))

In [None]:
gpo_info_dict = {}

for file in files_list[:]:
    try:
        file_info_dict = gpo_info_dict.get(file['gpo_uuid'], {})
        contents_dict = {}
        print(f'Processing {file["file_info"]["filepath"]}\n')

        tree = ET.parse(file["file_info"]['filepath'])
        root = tree.getroot()

        for r in root:
            print('-' * 80)
            if r.attrib:
                for k, v in r.attrib.items():
                    file_info_dict[k] = v
            file_info_dict[strip_dict(r.tag)] = strip_text(r.text)

            for c in r:
                if c.attrib:
                    for k, v in c.attrib.items():
                        file_info_dict[k] = v

                file_info_dict[strip_dict(c.tag)] = strip_text(c.text)

        gpo_info_dict.update(
            {file['gpo_uuid']: file_info_dict}
        )

    except Exception as e:
        print(f'There was an exception creating a DF for {file["gpo_uuid"]}')
        print(traceback.format_exc())
        continue



In [None]:
df = pd.DataFrame()

for k, v in gpo_info_dict.items():
    ser_data_dict = {'gpo_uuid': k}
    # print(k)

    for key, val in v.items():
        if isinstance(val, dict):
            for kk, vv in val.items():
                if isinstance(vv, dict):
                    for kkk, vvv in vv.items():
                        if isinstance(vvv, dict):
                            for kkkk, vvvv in vvv.items():
                                ser_data_dict[kkkk] = vvvv

                        ser_data_dict[kkk] = vvv
                ser_data_dict[kk] = vv
        ser_data_dict[key] = val

    df_current = pd.DataFrame([ser_data_dict])

    df = pd.concat([df, df_current], ignore_index=True)

In [None]:
df = df.mask(df.eq('')).dropna(how='all', axis=1)

In [None]:
df.to_csv('.csv', index=False)

In [None]:
update_settings_uid_list = []

for folder in folders_set:
    for root, dirs, files in os.walk(folder):
        for file in files:
            if file.lower().endswith('xml'):
                try:
                    with open(f'{root}/{file}', 'r', encoding='utf-8') as f:
                        lines = f.readlines()
                        for line in lines:

                            if (('update server' in line.lower()) | ('wsus' in line.lower())) & (
                                    'investigation' not in line.lower()):
                                print(line)
                                # print(f'Updates Found in {root}/{file}')
                                update_settings_uid_list.append(
                                    re.sub(r'[{}]+', '', (folder.replace(f'{source_dir}/', ""))))
                except:
                    pass

In [None]:
df[df['gpo_uuid'].isin(update_settings_uid_list)].to_csv('.csv', index=False)