In [None]:
import os

def extract_python_files(base_paths):
    """
    Extracts the paths, filenames, and content of Python files in specified directories.
    
    Parameters:
        base_paths (list): List of base paths to search for Python files.
        
    Returns:
        dict: A dictionary with the file paths as keys and file content as values.
    """
    extracted_data = {}
    
    for base_path in base_paths:
        for root, _, files in os.walk(base_path):
            for file in files:
                if file.endswith('.py'):
                    file_path = os.path.join(root, file)
                    try:
                        with open(file_path, 'r', encoding='utf-8') as f:
                            extracted_data[file_path] = f.read()
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")
    
    return extracted_data

def save_extracted_data(output_file, extracted_data):
    """
    Saves the extracted data to a file in a readable format.
    
    Parameters:
        output_file (str): Path to the output file.
        extracted_data (dict): Dictionary containing file paths and content.
    """
    with open(output_file, 'w', encoding='utf-8') as f:
        for path, content in extracted_data.items():
            f.write(f"# {path}\n")
            f.write(f"{content}\n")
            f.write("\n" + "-"*80 + "\n\n")

if __name__ == "__main__":
    # Specify the directories to search
    directories_to_search = ["dags", "include"]
    # Extract the Python files
    python_files = extract_python_files(directories_to_search)
    # Save the extracted data to a file
    save_extracted_data("extracted_python_files.txt", python_files)
    print("Extraction completed! Check 'extracted_python_files.txt' for the output.")


In [5]:
import os

def extract_files(base_paths, extensions):
    """
    Extracts the paths, filenames, and content of files with specified extensions in given directories.
    
    Parameters:
        base_paths (list): List of base paths to search for files.
        extensions (list): List of file extensions to include (e.g., ['.py', '.yml']).
        
    Returns:
        dict: A dictionary with the file paths as keys and file content as values.
    """
    extracted_data = {}
    
    for base_path in base_paths:
        for root, _, files in os.walk(base_path):
            for file in files:
                if any(file.endswith(ext) for ext in extensions):
                    file_path = os.path.join(root, file)
                    try:
                        with open(file_path, 'r', encoding='utf-8') as f:
                            extracted_data[file_path] = f.read()
                    except Exception as e:
                        print(f"Error reading {file_path}: {e}")
    
    return extracted_data

def save_extracted_data(output_file, extracted_data):
    """
    Saves the extracted data to a file in a readable format.
    
    Parameters:
        output_file (str): Path to the output file.
        extracted_data (dict): Dictionary containing file paths and content.
    """
    with open(output_file, 'w', encoding='utf-8') as f:
        for path, content in extracted_data.items():
            f.write(f"# {path}\n")
            f.write(f"{content}\n")
            f.write("\n" + "-"*80 + "\n\n")

if __name__ == "__main__":
    # Specify the directories to search
    directories_to_search = ["dags", "include"]
    # Specify the file extensions to include
    file_extensions = [".py", ".yml"]
    # Extract the files
    extracted_files = extract_files(directories_to_search, file_extensions)
    # Save the extracted data to a file
    save_extracted_data("extracted_files.txt", extracted_files)
    print("Extraction completed! Check 'extracted_files.txt' for the output.")

Extraction completed! Check 'extracted_files.txt' for the output.


In [None]:
# Importando bibliotecas necessárias
from kaggle.api.kaggle_api_extended import KaggleApi
import json
import os
import tempfile

# Inicializando a API do Kaggle
api = KaggleApi()
api.authenticate()

def get_kaggle_metadata(dataset_id):
    """
    Função para obter os metadados de um dataset Kaggle e exibir os campos disponíveis.
    
    Parâmetros:
        dataset_id (str): ID do dataset no formato 'username/dataset-name'.
    
    Retorna:
        dict: Metadados do dataset.
    """
    with tempfile.TemporaryDirectory() as temp_dir:
        # Baixar metadados do dataset
        api.dataset_metadata(dataset_id, path=temp_dir)
        
        # Caminho do arquivo de metadados
        metadata_path = os.path.join(temp_dir, "dataset-metadata.json")
        
        if not os.path.exists(metadata_path):
            print(f"Metadados para '{dataset_id}' não encontrados.")
            return None
        
        # Carregar os metadados
        with open(metadata_path, "r") as file:
            metadata = json.load(file)
        
        print(f"Metadados disponíveis para o dataset '{dataset_id}':")
        for key, value in metadata.items():
            print(f"- {key}: {value}")
        
        return metadata

# ID do dataset Kaggle
dataset_id = "olistbr/marketing-funnel-olist"  # Altere para o dataset que deseja verificar

# Chamando a função e exibindo os metadados
metadata = get_kaggle_metadata(dataset_id)
