In [4]:
import os
import xml.etree.ElementTree as ET
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, StandardScaler


def read_hdr_file(file_path):
    """Lee el archivo .hdr y lo convierte a un DataFrame."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.readlines()
        data = {'Line': list(range(1, len(content) + 1)), 'Content': [line.strip() for line in content]}
        df = pd.DataFrame(data)
        return df
    except Exception as e:
        print(f"Error leyendo {file_path}: {e}")
        return pd.DataFrame()


def read_sld_file(file_path):
    """Lee el archivo .sld y lo convierte a un DataFrame."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.readlines()
        data = {'Line': list(range(1, len(content) + 1)), 'Content': [line.strip() for line in content]}
        df = pd.DataFrame(data)
        return df
    except Exception as e:
        print(f"Error leyendo {file_path}: {e}")
        return pd.DataFrame()


def read_tfw_file(file_path):
    """Lee el archivo .tfw y lo convierte a un DataFrame."""
    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            content = file.readlines()
        data = {'Line': list(range(1, len(content) + 1)), 'Content': [line.strip() for line in content]}
        df = pd.DataFrame(data)
        return df
    except Exception as e:
        print(f"Error leyendo {file_path}: {e}")
        return pd.DataFrame()


def read_xml_file(file_path):
    """Parsea el archivo XML y convierte los elementos a un DataFrame."""
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
        elements = []
        for element in root.iter():
            tag = element.tag
            text = element.text.strip() if element.text else None
            attributes = element.attrib
            elements.append({'Tag': tag, 'Text': text, 'Attributes': attributes})
        
        df = pd.DataFrame(elements)
        return df
    except Exception as e:
        print(f"Error leyendo {file_path}: {e}")
        return pd.DataFrame()


def read_dbf_file(file_path):
    """Lee un archivo .dbf y lo convierte a un DataFrame de pandas."""
    try:
        from dbfread import DBF
        table = DBF(file_path, encoding='latin1')
        df = pd.DataFrame(iter(table))
        return df
    except Exception as e:
        print(f"Error leyendo {file_path}: {e}")
        return pd.DataFrame()


def main():
    """Función principal para leer los archivos y convertirlos a DataFrames de pandas."""
    file_groups = {
        'grupo_1': {
            'MUCSC_2022_10_m_v_3.hdr': read_hdr_file,
            'MUCSC_2022_10_m_v_3.sld': read_sld_file,
            'MUCSC_2022_10_m_v_3.TFW': read_tfw_file,
            'MUCSC_2022_10_m_v_3.xml': read_xml_file,
            'MUCSC_2022_10_m_v_3_tiff_atributs_0_0.dbf': read_dbf_file
        },
        'grupo_2': {
            'MUCSC_2022_30_m_v_3.hdr': read_hdr_file,
            'MUCSC_2022_30_m_v_3.sld': read_sld_file,
            'MUCSC_2022_30_m_v_3.TFW': read_tfw_file,
            'MUCSC_2022_30_m_v_3.xml': read_xml_file,
            'MUCSC_2022_30_m_v_3_tiff_atributs_0_0.dbf': read_dbf_file
        }
    }
    
    base_path = "C:\\Users\\jiahu\\OneDrive\\Escritorio\\AI3\\Bitsxm\\Dades\\LandUse\\"
    all_dataframes = {}

    for group_name, files in file_groups.items():
        group_dfs = {}
        for file_name, read_function in files.items():
            file_path = os.path.join(base_path, file_name)
            if os.path.exists(file_path):
                print(f"Leyendo {file_name}...")
                df = read_function(file_path)
                group_dfs[file_name] = df
            else:
                print(f"El archivo {file_name} no se encontró en {base_path}.")
        
        all_dataframes[group_name] = group_dfs

    return all_dataframes


if __name__ == "__main__":
    all_dataframes = main()
    
    # Verificar la estructura de los DataFrames
    for group_name, dfs in all_dataframes.items():
        print(f"\nDataFrames del grupo {group_name}:")
        for file_name, df in dfs.items():
            print(f"- {file_name}: {df.shape[0]} filas, {df.shape[1]} columnas")


Leyendo MUCSC_2022_10_m_v_3.hdr...
Leyendo MUCSC_2022_10_m_v_3.sld...
Leyendo MUCSC_2022_10_m_v_3.TFW...
Leyendo MUCSC_2022_10_m_v_3.xml...
Leyendo MUCSC_2022_10_m_v_3_tiff_atributs_0_0.dbf...
Leyendo MUCSC_2022_30_m_v_3.hdr...
Leyendo MUCSC_2022_30_m_v_3.sld...
Leyendo MUCSC_2022_30_m_v_3.TFW...
Leyendo MUCSC_2022_30_m_v_3.xml...
Leyendo MUCSC_2022_30_m_v_3_tiff_atributs_0_0.dbf...

DataFrames del grupo grupo_1:
- MUCSC_2022_10_m_v_3.hdr: 10 filas, 2 columnas
- MUCSC_2022_10_m_v_3.sld: 45 filas, 2 columnas
- MUCSC_2022_10_m_v_3.TFW: 6 filas, 2 columnas
- MUCSC_2022_10_m_v_3.xml: 492 filas, 3 columnas
- MUCSC_2022_10_m_v_3_tiff_atributs_0_0.dbf: 25 filas, 2 columnas

DataFrames del grupo grupo_2:
- MUCSC_2022_30_m_v_3.hdr: 10 filas, 2 columnas
- MUCSC_2022_30_m_v_3.sld: 45 filas, 2 columnas
- MUCSC_2022_30_m_v_3.TFW: 6 filas, 2 columnas
- MUCSC_2022_30_m_v_3.xml: 492 filas, 3 columnas
- MUCSC_2022_30_m_v_3_tiff_atributs_0_0.dbf: 25 filas, 2 columnas
