<a href="https://colab.research.google.com/github/engbrunoconsultor/CNC_Machining/blob/main/CNC_Machining_TCC.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import requests, io, tempfile, os

In [2]:
owner = "engbrunoconsultor"
repo = "CNC_Machining"
branch = "feature_01"
root_paste = "data"




Descubra as keys que o arquivo possui

In [13]:
# def descobre_keys(binario):
#     with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as tmp:
#         tmp.write(binario)
#         path = tmp.name
#     try:
#         with pd.HDFStore(path, mode="r") as store:
#             return store.keys()       # ex.: ['/df'] ou ['/table'] etc.
#     finally:
#         os.remove(path)

# # exemplo só com o primeiro arquivo:
# arq_teste = h5_file_list(owner, repo, branch,
#                               "data/M01/OP00/good")[0]
# binario   = requests.get(arq_teste["download_url"]).content
# print(descobre_keys(binario))

In [4]:
def bytes_to_df(h5_bytes, key=None):
    """Detecta a key se não for informada e devolve DataFrame."""
    import pandas as pd, tempfile, os

    with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as tmp:
        tmp.write(h5_bytes)
        path = tmp.name

    try:
        # Se o usuário NÃO passou key, tenta inferir:
        if key is None:
            try:
                return pd.read_hdf(path)           # pandas infere se houver 1 só
            except KeyError:
                pass                    # continua para detecção manual

        # Se key foi passada mas falhou ou ainda não sabemos:
        with pd.HDFStore(path, "r") as store:
            keys = store.keys()         # lista todas
        if not keys:
            raise ValueError("Arquivo HDF5 vazio")
        if key is None or key not in keys:
            key = keys[0]               # usa a primeira disponível
        return pd.read_hdf(path, key=key)
    finally:
        os.remove(path)


Ler o dataset certo e virar DataFrame

In [5]:
import pandas as pd, numpy as np, tempfile, os

def bytes_to_df_flex(h5_bytes, dataset_path=None):
    """
    Tenta ler via pandas; se falhar, abre com h5py
    e converte o dataset_path escolhido em DataFrame.
    """
    with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as tmp:
        tmp.write(h5_bytes)
        path = tmp.name

    try:
        # 1) primeiro tenta 'read_hdf' normal
        try:
            return pd.read_hdf(path)            # funciona se for estilo pandas
        except (ValueError, KeyError):
            pass                                # vai para fallback

        # 2) fallback h5py
        import h5py
        with h5py.File(path, "r") as f:
            # Se o usuário não informou qual dataset quer, pega o primeiro 2-D
            if dataset_path is None:
                # procura um dataset matriz (ndim==2)
                for name in f:
                    obj = f[name]
                    if isinstance(obj, h5py.Dataset) and obj.ndim == 2:
                        dataset_path = name
                        break
                else:
                    raise ValueError("Nenhum dataset 2-D encontrado.")

            data = f[dataset_path][()]          # lê como ndarray

        # cria DataFrame (colunas numeradas se não houver labels)
        if data.ndim == 1:
            df = pd.DataFrame({"value": data})
        else:
            cols = [f"col_{i}" for i in range(data.shape[1])]
            df = pd.DataFrame(data, columns=cols)

        return df

    finally:
        os.remove(path)


In [6]:
def h5_file_list(owner, repo, branch, api_path):
  url = f"https://api.github.com/repos/{owner}/{repo}/contents/{api_path}?ref={branch}"
  resp = requests.get(url)
  resp.raise_for_status()
  conteudo = resp.json()
  return [item for item in conteudo if item["name"].lower().endswith(".h5")]





In [15]:
def list_subfolders(owner, repo, branch, path):
    """Lista subpastas de um diretório em um repositório via API do GitHub"""
    api_url = f"https://api.github.com/repos/{owner}/{repo}/contents/{path}?ref={branch}"
    resposta = requests.get(api_url)
    if resposta.status_code != 200:
        raise Exception(f"Erro ao acessar {api_url}")
    conteudo = resposta.json()
    return [item['name'] for item in conteudo if item['type'] == 'dir']

Iterar sobre os itens

In [7]:
dfs = {}
machines = ["M01", "M02", "M03"]
for machine in machines:
    api_path = f"{root_paste}/{machine}/OP00/good"
    for arq in h5_file_list(owner, repo, branch, api_path):
        print(f"Lendo {arq['path']}…")
        binario = requests.get(arq["download_url"]).content   # baixa o .h5
        try:
          df = bytes_to_df_flex(binario)    # ← mudança aqui
          dfs[f"{machine}_{arq['name'][:-3]}"] = df
        except Exception as e:
          print(f" {arq['name']} ignorado: {e}")

print("Total lido =>", len(dfs), "DataFrames")

Lendo data/M01/OP00/good/M01_Aug_2019_OP00_000.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_001.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_003.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_004.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_005.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_006.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_007.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_008.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_009.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_010.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_011.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_012.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_013.h5…
Lendo data/M01/OP00/good/M01_Aug_2021_OP00_002.h5…
Lendo data/M01/OP00/good/M01_Aug_2021_OP00_003.h5…
Lendo data/M01/OP00/good/M01_Feb_2019_OP00_000.h5…
Lendo data/M01/OP00/good/M01_Feb_2019_OP00_001.h5…
Lendo data/M01/OP00/good/M01_Feb_2019_OP00_002.h5…
Lendo data/M01/OP00/good/M01_Feb_2019_OP00_003.h5…
Lendo data/M01/OP00/good/M01_Fe

In [16]:
# Dicionário com todos os dataframes
dfs = {}

# Lista de máquinas
machines = ["M01", "M02", "M03"]

for machine in machines:
    # Caminho raiz da máquina
    machine_path = f"{root_paste}/{machine}"

    # Detectar dinamicamente OP folders
    try:
        op_folders = list_subfolders(owner, repo, branch, machine_path)
        op_folders = [op for op in op_folders if op.startswith("OP")]
    except Exception as e:
        print(f"Erro ao listar OPs de {machine_path}: {e}")
        continue

    for op in op_folders:
        for categoria in ["good", "bad"]:
            final_path = f"{machine_path}/{op}/{categoria}"
            try:
                arquivos = h5_file_list(owner, repo, branch, final_path)
            except Exception:
                continue  # Se não existir a pasta, apenas pula

            for arq in arquivos:
                print(f"Lendo {arq['path']}…")
                try:
                    binario = requests.get(arq["download_url"]).content
                    df = bytes_to_df_flex(binario)
                    chave = f"{machine}_{op}_{categoria}_{arq['name'][:-3]}"
                    dfs[chave] = df
                except Exception as e:
                    print(f" {arq['name']} ignorado: {e}")

print("Total lido =>", len(dfs), "DataFrames")

Lendo data/M01/OP00/good/M01_Aug_2019_OP00_000.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_001.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_003.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_004.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_005.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_006.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_007.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_008.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_009.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_010.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_011.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_012.h5…
Lendo data/M01/OP00/good/M01_Aug_2019_OP00_013.h5…
Lendo data/M01/OP00/good/M01_Aug_2021_OP00_002.h5…
Lendo data/M01/OP00/good/M01_Aug_2021_OP00_003.h5…
Lendo data/M01/OP00/good/M01_Feb_2019_OP00_000.h5…
Lendo data/M01/OP00/good/M01_Feb_2019_OP00_001.h5…
Lendo data/M01/OP00/good/M01_Feb_2019_OP00_002.h5…
Lendo data/M01/OP00/good/M01_Feb_2019_OP00_003.h5…
Lendo data/M01/OP00/good/M01_Fe

Ver o que foi carregado

In [26]:
# Quantos e quais DataFrames temos?
print(f"{len(dfs)} dataframes no dicionário")
print(list(dfs.keys())[:10])        # mostra só os 10 primeiros nomes


1623 dataframes no dicionário
['M01_OP00_good_M01_Aug_2019_OP00_000', 'M01_OP00_good_M01_Aug_2019_OP00_001', 'M01_OP00_good_M01_Aug_2019_OP00_003', 'M01_OP00_good_M01_Aug_2019_OP00_004', 'M01_OP00_good_M01_Aug_2019_OP00_005', 'M01_OP00_good_M01_Aug_2019_OP00_006', 'M01_OP00_good_M01_Aug_2019_OP00_007', 'M01_OP00_good_M01_Aug_2019_OP00_008', 'M01_OP00_good_M01_Aug_2019_OP00_009', 'M01_OP00_good_M01_Aug_2019_OP00_010']


 tamanho de cada um

In [27]:
for nome, df in dfs.items():
    print(f"{nome:<30}  linhas={len(df):>8}  colunas={df.shape[1]}")

M01_OP00_good_M01_Aug_2019_OP00_000  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_001  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_003  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_004  linhas=  264192  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_005  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_006  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_007  linhas=  269312  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_008  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_009  linhas=  263168  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_010  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_011  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_012  linhas=  269312  colunas=3
M01_OP00_good_M01_Aug_2019_OP00_013  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2021_OP00_002  linhas=  268288  colunas=3
M01_OP00_good_M01_Aug_2021_OP00_003  linhas=  269312  colunas=3
M01_OP00_good_M01_Feb_2019_OP00_000  lin

Abrir (exibir) um DataFrame específico

In [12]:
df_m01 = dfs["M01_M01_Aug_2019_OP00_000"]   # troque pela key que apareceu acima
display(df_m01.head())              # mostra as 5 primeiras linhas
df_m01.info()                       # estrutura completa
df_m01.describe()                   # estatísticas básicas


Unnamed: 0,col_0,col_1,col_2
0,-9.0,9.0,-1011.0
1,-21.0,19.0,-1011.0
2,-15.0,23.0,-1020.0
3,-9.0,27.0,-1013.0
4,-9.0,17.0,-1020.0


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 268288 entries, 0 to 268287
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   col_0   268288 non-null  float64
 1   col_1   268288 non-null  float64
 2   col_2   268288 non-null  float64
dtypes: float64(3)
memory usage: 6.1 MB


Unnamed: 0,col_0,col_1,col_2
count,268288.0,268288.0,268288.0
mean,-3.87487,34.393633,-1014.514865
std,469.406993,158.931394,142.390191
min,-2012.0,-1868.0,-1786.0
25%,-58.0,-54.0,-1097.0
50%,-9.0,27.0,-1036.0
75%,46.0,128.0,-921.0
max,2252.0,2051.0,-152.0


In [28]:
m01 = {k: v for k, v in dfs.items() if k.startswith('M01_')}
m02 = {k: v for k, v in dfs.items() if k.startswith('M02_')}
m03 = {k: v for k, v in dfs.items() if k.startswith('M03_')}

In [29]:
m01

{'M01_OP00_good_M01_Aug_2019_OP00_000':         col_0  col_1   col_2
 0        -9.0    9.0 -1011.0
 1       -21.0   19.0 -1011.0
 2       -15.0   23.0 -1020.0
 3        -9.0   27.0 -1013.0
 4        -9.0   17.0 -1020.0
 ...       ...    ...     ...
 268283  -17.0   23.0 -1013.0
 268284  -19.0   19.0 -1009.0
 268285  -13.0   33.0 -1015.0
 268286  -13.0   27.0 -1016.0
 268287  -31.0   25.0 -1020.0
 
 [268288 rows x 3 columns],
 'M01_OP00_good_M01_Aug_2019_OP00_001':         col_0  col_1   col_2
 0       -15.0   21.0 -1009.0
 1       -13.0   23.0 -1013.0
 2       -13.0   19.0 -1009.0
 3       -11.0   33.0 -1015.0
 4       -15.0   31.0 -1016.0
 ...       ...    ...     ...
 268283  -13.0   39.0 -1020.0
 268284   -3.0   27.0 -1013.0
 268285   -5.0    7.0 -1028.0
 268286  -11.0   21.0 -1022.0
 268287   -5.0   15.0 -1005.0
 
 [268288 rows x 3 columns],
 'M01_OP00_good_M01_Aug_2019_OP00_003':         col_0  col_1   col_2
 0        -9.0   15.0 -1018.0
 1       -27.0   15.0 -1005.0
 2       -13.



---



In [30]:
import numpy as np
import os
import random
import tensorflow
from tensorflow import keras
from keras.models import Model
from keras.layers import Input, Dense
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
#seed para reprodutibilidade
seed = 0
np.random.seed(seed)
tensorflow.random.set_seed(seed)
random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)

initializer = tensorflow.keras.initializers.GlorotUniform(seed=seed)
tensorflow.config.experimental.enable_op_determinism()

