In [1]:
# Importaciones estándar para análisis de datos
import os          # Operaciones del sistema de archivos
import glob        # Búsqueda de archivos con patrones

# Bibliotecas numéricas y de manejo de datos
import numpy as np  # Operaciones numéricas
import pandas as pd # Manipulación de datos

# Visualización
import matplotlib.pyplot as plt # Gráficos

# Preprocesamiento de datos
from sklearn.preprocessing import (
    MinMaxScaler,   # Normalización [0, 1]
    StandardScaler, # Estandarización (media=0, var=1) 
    RobustScaler    # Escalado robusto a outliers
)

# Persistencia de modelos
import joblib  # Guardar/cargar modelos

# Configuración de Jupyter Notebook
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"  # Mostrar múltiples outputs

In [2]:
# Carga y visualización básica de datos CSV

# 1. Construye la ruta al archivo CSV combinando directorio y nombre de archivo
file = os.path.join('C:/Users/Milovan/Desktop/notebook para repositorio/chancadores/', 'Chancador_Raw_Data.csv')

# 2. Carga el archivo CSV en un DataFrame de pandas llamado df1
df1 = pd.read_csv(file)

# 3. Crea un diccionario con información básica sobre el DataFrame
attrs = {
    'var': 'df1',               # Nombre de la variable donde está almacenado
    'type': type(df1).__name__,  # Tipo de objeto (será 'DataFrame')
}

# 4. Muestra los resultados:
[attrs]  # Muestra el diccionario de metadatos dentro de una lista
df1      # Muestra el contenido completo del DataFrame

  df1 = pd.read_csv(file)


[{'var': 'df1', 'type': 'DataFrame'}]

Unnamed: 0.1,Unnamed: 0,Tiempo,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto Anillo,...,Nivel Taza,Setting,Corriente.1,Pesómetro,Corriente.2,V. Cero,Corriente.3,V. Cero.1,Corriente.4,V. Cero.2
0,0,2017-07-01 08:00:00,67.05307006835938,362.78125,39.60781478881836,36.28849029541016,55.49257659912109,34.39426040649414,34.88699340820312,7.641797065734863,...,40.26848220825195,28.60000610351562,45,2275.893310546875,84.06780242919922,101.5626068115234,39.59999847412109,94.04566955566406,Bad Input,0.3102322518825531
1,1,2017-07-01 08:02:00,59.17750549316406,387.68359375,39.49625015258789,36.19640731811523,55.34035873413086,34.04961013793945,34.83254241943359,7.492578029632568,...,40.79876327514648,28.60000610351562,46,2300.636962890625,83.05084991455078,101.5591583251953,40,90.39868927001953,Bad Input,0.3104053437709808
2,2,2017-07-01 08:04:00,63.52828216552734,305.9140625,39.35148620605469,36.12993240356445,55.47822570800781,33.91414260864258,34.7747688293457,7.526171684265137,...,40.99420166015625,28.60000610351562,45,2259.741455078125,83.05084991455078,101.4512939453125,40,89.26689910888672,Bad Input,0.3102254867553711
3,3,2017-07-01 08:06:00,65.61479187011719,360.4296875,39.30965042114258,36.00020599365234,55.26688385009766,33.76937484741211,34.65125274658203,7.255859375,...,40.18500518798828,28.60000610351562,45.40000152587891,2412.8974609375,86.440673828125,101.2942047119141,41,96.05992889404297,Bad Input,0.3109926283359528
4,4,2017-07-01 08:08:00,68.3459243774414,318.9921875,39.19012069702148,36.00524520874023,55.60165023803711,33.6657829284668,34.54035186767578,7.751172065734863,...,40.91349411010742,28.60000610351562,46.59999847412109,2487.29345703125,88.1355972290039,101.3100662231445,41.40000152587891,95.88184356689453,Bad Input,0.3103735744953156
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048570,60723,2019-09-23 17:06:00,0,1.80078125,33.85968780517578,37.81310653686523,47.38640594482422,29.54062652587891,22.3793773651123,-0.07421875,...,Bad Input,32.20000457763672,Bad Input,-28.52626609802246,0,-0.2583421468734741,Bad Input,-0.466058611869812,Bad Input,0.4079932272434235
1048571,60724,2019-09-23 17:08:00,0,1.81640625,33.79726791381836,37.80581665039062,47.36933517456055,29.28695487976074,22.37539291381836,-0.07421875,...,Bad Input,32.20000457763672,Bad Input,-28.52626609802246,0,-0.2614571452140808,Bad Input,-0.4668164253234863,Bad Input,0.4085834622383118
1048572,60725,2019-09-23 17:10:00,0,1.84765625,33.82914352416992,37.8098258972168,47.35480117797852,29.22320556640625,22.39398574829102,-0.07421875,...,Bad Input,32.20000457763672,Bad Input,-28.52626609802246,0,-0.2594147324562073,Bad Input,-0.4706054925918579,Bad Input,0.4086388945579529
1048573,60726,2019-09-23 17:12:00,0,1.85546875,33.98719024658203,37.85100936889648,47.34027099609375,29.23515892028809,22.33023643493652,-0.07421875,...,Bad Input,32.20000457763672,Bad Input,-28.52626609802246,0,-0.2595284879207611,Bad Input,-0.4706054925918579,Bad Input,0.4078384935855865


In [3]:
# Eliminación de columnas específicas del DataFrame

# 1. Elimina las columnas especificadas
columnas_a_eliminar = [
    'Unnamed: 0',    # Columna de índice automático (generalmente innecesaria)
    'Corriente.1',    # Posible duplicado de medición de corriente
    'Pesómetro',     # Datos de peso (si no son relevantes para el análisis)
    'Corriente.2',    # Otra medición redundante de corriente
    'V. Cero',       # Valores de voltaje en cero
    'Corriente.3',    # Medición adicional de corriente
    'V. Cero.1',     # Segundo conjunto de valores de voltaje en cero
    'Corriente.4',    # Otra medición de corriente
    'V. Cero.2'      # Tercer conjunto de valores de voltaje en cero
]

df1 = df1.drop(columnas_a_eliminar, axis='columns')  # axis='columns' indica que son columnas a eliminar

# 2. Crea metadatos del DataFrame resultante
attrs = {
    'var': 'df1',               # Nombre de la variable
    'type': type(df1).__name__,  # Tipo de objeto (DataFrame)
}

# 3. Muestra los resultados:
[attrs]  # Muestra los metadatos en formato de lista
df1      # Muestra el DataFrame sin las columnas eliminadas

[{'var': 'df1', 'type': 'DataFrame'}]

Unnamed: 0,Tiempo,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto Anillo,Unnamed: 9,Unnamed: 10,Unnamed: 11,Nivel Taza,Setting
0,2017-07-01 08:00:00,67.05307006835938,362.78125,39.60781478881836,36.28849029541016,55.49257659912109,34.39426040649414,34.88699340820312,7.641797065734863,10.31718730926514,6.255078315734863,14.70351600646973,40.26848220825195,28.60000610351562
1,2017-07-01 08:02:00,59.17750549316406,387.68359375,39.49625015258789,36.19640731811523,55.34035873413086,34.04961013793945,34.83254241943359,7.492578029632568,9.986719131469727,5.973828315734863,13.18476581573486,40.79876327514648,28.60000610351562
2,2017-07-01 08:04:00,63.52828216552734,305.9140625,39.35148620605469,36.12993240356445,55.47822570800781,33.91414260864258,34.7747688293457,7.526171684265137,10.84414100646973,6.708593845367432,14.61250019073486,40.99420166015625,28.60000610351562
3,2017-07-01 08:06:00,65.61479187011719,360.4296875,39.30965042114258,36.00020599365234,55.26688385009766,33.76937484741211,34.65125274658203,7.255859375,9.587109565734863,6.538281440734863,13.70585918426514,40.18500518798828,28.60000610351562
4,2017-07-01 08:08:00,68.3459243774414,318.9921875,39.19012069702148,36.00524520874023,55.60165023803711,33.6657829284668,34.54035186767578,7.751172065734863,10.17617225646973,7.362890720367432,14.80859375,40.91349411010742,28.60000610351562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048570,2019-09-23 17:06:00,0,1.80078125,33.85968780517578,37.81310653686523,47.38640594482422,29.54062652587891,22.3793773651123,-0.07421875,0.01171875,-0.0546875,-0.060546875,Bad Input,32.20000457763672
1048571,2019-09-23 17:08:00,0,1.81640625,33.79726791381836,37.80581665039062,47.36933517456055,29.28695487976074,22.37539291381836,-0.07421875,0.01171875,-0.0546875,-0.060546875,Bad Input,32.20000457763672
1048572,2019-09-23 17:10:00,0,1.84765625,33.82914352416992,37.8098258972168,47.35480117797852,29.22320556640625,22.39398574829102,-0.07421875,0.01171875,-0.0546875,-0.060546875,Bad Input,32.20000457763672
1048573,2019-09-23 17:12:00,0,1.85546875,33.98719024658203,37.85100936889648,47.34027099609375,29.23515892028809,22.33023643493652,-0.07421875,0.01171875,-0.0546875,-0.060546875,Bad Input,32.20000457763672


In [4]:
# Renombrado de columnas del DataFrame

# 1. Renombra las columnas especificadas
df1 = df1.rename(columns={
    'Salto Anillo': 'Salto anillo A',  # Renombra 'Salto Anillo' a 'Salto anillo A'
    'Unnamed: 9': 'Salto Anillo B',    # Renombra columna sin nombre a 'Salto Anillo B'
    'Unnamed: 10': 'Salto Anillo C',   # Renombra columna sin nombre a 'Salto Anillo C'
    'Unnamed: 11': 'Salto Anillo D'    # Renombra columna sin nombre a 'Salto Anillo D'
})

# 2. Crea metadatos del DataFrame
attrs = {
    'var': 'df1',               # Nombre de la variable
    'type': type(df1).__name__,  # Tipo de objeto (DataFrame)
}

# 3. Muestra resultados:
[attrs]  # Muestra los metadatos en formato lista
df1      # Muestra el DataFrame con los nuevos nombres de columnas

[{'var': 'df1', 'type': 'DataFrame'}]

Unnamed: 0,Tiempo,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
0,2017-07-01 08:00:00,67.05307006835938,362.78125,39.60781478881836,36.28849029541016,55.49257659912109,34.39426040649414,34.88699340820312,7.641797065734863,10.31718730926514,6.255078315734863,14.70351600646973,40.26848220825195,28.60000610351562
1,2017-07-01 08:02:00,59.17750549316406,387.68359375,39.49625015258789,36.19640731811523,55.34035873413086,34.04961013793945,34.83254241943359,7.492578029632568,9.986719131469727,5.973828315734863,13.18476581573486,40.79876327514648,28.60000610351562
2,2017-07-01 08:04:00,63.52828216552734,305.9140625,39.35148620605469,36.12993240356445,55.47822570800781,33.91414260864258,34.7747688293457,7.526171684265137,10.84414100646973,6.708593845367432,14.61250019073486,40.99420166015625,28.60000610351562
3,2017-07-01 08:06:00,65.61479187011719,360.4296875,39.30965042114258,36.00020599365234,55.26688385009766,33.76937484741211,34.65125274658203,7.255859375,9.587109565734863,6.538281440734863,13.70585918426514,40.18500518798828,28.60000610351562
4,2017-07-01 08:08:00,68.3459243774414,318.9921875,39.19012069702148,36.00524520874023,55.60165023803711,33.6657829284668,34.54035186767578,7.751172065734863,10.17617225646973,7.362890720367432,14.80859375,40.91349411010742,28.60000610351562
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048570,2019-09-23 17:06:00,0,1.80078125,33.85968780517578,37.81310653686523,47.38640594482422,29.54062652587891,22.3793773651123,-0.07421875,0.01171875,-0.0546875,-0.060546875,Bad Input,32.20000457763672
1048571,2019-09-23 17:08:00,0,1.81640625,33.79726791381836,37.80581665039062,47.36933517456055,29.28695487976074,22.37539291381836,-0.07421875,0.01171875,-0.0546875,-0.060546875,Bad Input,32.20000457763672
1048572,2019-09-23 17:10:00,0,1.84765625,33.82914352416992,37.8098258972168,47.35480117797852,29.22320556640625,22.39398574829102,-0.07421875,0.01171875,-0.0546875,-0.060546875,Bad Input,32.20000457763672
1048573,2019-09-23 17:12:00,0,1.85546875,33.98719024658203,37.85100936889648,47.34027099609375,29.23515892028809,22.33023643493652,-0.07421875,0.01171875,-0.0546875,-0.060546875,Bad Input,32.20000457763672


In [5]:
# Conversión de tipos de datos manteniendo el formato original

# 1. Conversión de columna de tiempo a formato datetime
df1['Tiempo'] = pd.to_datetime(df1['Tiempo'], errors='coerce')  # Convierte errores a NaT

# 2. Lista de columnas numéricas a convertir
columnas_numericas = [
    'Corriente',
    'Presión C. Eje',
    'T° Retorno', 
    'T° socket Linner',
    'T° Excéntrica',
    'T° Alimentación',
    'T° Desc. C. Eje',
    'Salto anillo A',
    'Salto Anillo B',
    'Salto Anillo C',
    'Salto Anillo D',
    'Nivel Taza',
    'Setting'
]

# 3. Conversión de cada columna a numérico
for col in columnas_numericas:
    df1[col] = pd.to_numeric(df1[col], errors='coerce')  # Convierte errores a NaN

# 4. Metadatos del DataFrame (sin cambios)
attrs = {
    'var': 'df1',                # Nombre de la variable
    'type': type(df1).__name__,  # Tipo de objeto (DataFrame)
}

# 5. Resultados (idénticos al original):
[attrs]  # Muestra metadatos en lista
df1      # Muestra el DataFrame convertido

[{'var': 'df1', 'type': 'DataFrame'}]

Unnamed: 0,Tiempo,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
0,2017-07-01 08:00:00,67.053070,362.781250,39.607815,36.288490,55.492577,34.394260,34.886993,7.641797,10.317187,6.255078,14.703516,40.268482,28.600006
1,2017-07-01 08:02:00,59.177505,387.683594,39.496250,36.196407,55.340359,34.049610,34.832542,7.492578,9.986719,5.973828,13.184766,40.798763,28.600006
2,2017-07-01 08:04:00,63.528282,305.914062,39.351486,36.129932,55.478226,33.914143,34.774769,7.526172,10.844141,6.708594,14.612500,40.994202,28.600006
3,2017-07-01 08:06:00,65.614792,360.429688,39.309650,36.000206,55.266884,33.769375,34.651253,7.255859,9.587110,6.538281,13.705859,40.185005,28.600006
4,2017-07-01 08:08:00,68.345924,318.992188,39.190121,36.005245,55.601650,33.665783,34.540352,7.751172,10.176172,7.362891,14.808594,40.913494,28.600006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1048570,2019-09-23 17:06:00,0.000000,1.800781,33.859688,37.813107,47.386406,29.540627,22.379377,-0.074219,0.011719,-0.054688,-0.060547,,32.200005
1048571,2019-09-23 17:08:00,0.000000,1.816406,33.797268,37.805817,47.369335,29.286955,22.375393,-0.074219,0.011719,-0.054688,-0.060547,,32.200005
1048572,2019-09-23 17:10:00,0.000000,1.847656,33.829144,37.809826,47.354801,29.223206,22.393986,-0.074219,0.011719,-0.054688,-0.060547,,32.200005
1048573,2019-09-23 17:12:00,0.000000,1.855469,33.987190,37.851009,47.340271,29.235159,22.330236,-0.074219,0.011719,-0.054688,-0.060547,,32.200005


In [6]:
# Eliminación de filas con valores faltantes (NaN/NaT)

# 1. Elimina todas las filas que contengan al menos un valor nulo
df1 = df1.dropna(axis='index')  # axis='index' indica que se eliminan filas (no columnas)

# 2. Crea metadatos del DataFrame limpio
attrs = {
    'var': 'df1',               # Nombre de la variable
    'type': type(df1).__name__,  # Tipo de objeto (DataFrame)
}

# 3. Muestra resultados:
[attrs]  # Muestra los metadatos en formato lista
df1      # Muestra el DataFrame sin filas con valores nulos

[{'var': 'df1', 'type': 'DataFrame'}]

Unnamed: 0,Tiempo,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
0,2017-07-01 08:00:00,67.053070,362.781250,39.607815,36.288490,55.492577,34.394260,34.886993,7.641797,10.317187,6.255078,14.703516,40.268482,28.600006
1,2017-07-01 08:02:00,59.177505,387.683594,39.496250,36.196407,55.340359,34.049610,34.832542,7.492578,9.986719,5.973828,13.184766,40.798763,28.600006
2,2017-07-01 08:04:00,63.528282,305.914062,39.351486,36.129932,55.478226,33.914143,34.774769,7.526172,10.844141,6.708594,14.612500,40.994202,28.600006
3,2017-07-01 08:06:00,65.614792,360.429688,39.309650,36.000206,55.266884,33.769375,34.651253,7.255859,9.587110,6.538281,13.705859,40.185005,28.600006
4,2017-07-01 08:08:00,68.345924,318.992188,39.190121,36.005245,55.601650,33.665783,34.540352,7.751172,10.176172,7.362891,14.808594,40.913494,28.600006
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
872093,2019-05-06 12:34:00,0.000000,217.148438,34.072269,35.137676,41.315334,32.226566,29.100393,0.087891,0.275391,0.082031,0.305664,27.349173,51.299965
872094,2019-05-06 12:36:00,0.000000,210.371094,34.305470,35.364925,41.157520,33.551567,28.854691,0.134766,0.360352,0.125000,0.437500,28.305700,51.299965
872095,2019-05-06 12:38:00,0.000000,208.222656,34.530472,35.413589,41.037106,34.060547,28.853909,0.140625,0.392578,0.177734,0.459961,0.260094,51.299965
872096,2019-05-06 12:40:00,0.000000,205.507812,34.852345,35.491409,40.935600,34.508595,28.867971,0.125000,0.342773,0.158203,0.433594,28.762465,51.299965


In [7]:
# Configuración de índice temporal sin modificar datos

# 1. Establece la columna 'Tiempo' como índice del DataFrame
df1 = df1.set_index('Tiempo')  # Versión más limpia que 'inplace=True'

# 2. Crea metadatos del DataFrame
attrs = {
    'var': 'df1',               # Nombre de la variable
    'type': type(df1).__name__,  # Tipo de objeto (DataFrame)
    'index': df1.index.name      # Muestra el nombre del índice
}

# 3. Muestra resultados:
[attrs]  # Output: [{'var': 'df1', 'type': 'DataFrame', 'index': 'Tiempo'}]
df1      # Muestra el DataFrame con 'Tiempo' como índice

[{'var': 'df1', 'type': 'DataFrame', 'index': 'Tiempo'}]

Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-07-01 08:00:00,67.053070,362.781250,39.607815,36.288490,55.492577,34.394260,34.886993,7.641797,10.317187,6.255078,14.703516,40.268482,28.600006
2017-07-01 08:02:00,59.177505,387.683594,39.496250,36.196407,55.340359,34.049610,34.832542,7.492578,9.986719,5.973828,13.184766,40.798763,28.600006
2017-07-01 08:04:00,63.528282,305.914062,39.351486,36.129932,55.478226,33.914143,34.774769,7.526172,10.844141,6.708594,14.612500,40.994202,28.600006
2017-07-01 08:06:00,65.614792,360.429688,39.309650,36.000206,55.266884,33.769375,34.651253,7.255859,9.587110,6.538281,13.705859,40.185005,28.600006
2017-07-01 08:08:00,68.345924,318.992188,39.190121,36.005245,55.601650,33.665783,34.540352,7.751172,10.176172,7.362891,14.808594,40.913494,28.600006
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-05-06 12:34:00,0.000000,217.148438,34.072269,35.137676,41.315334,32.226566,29.100393,0.087891,0.275391,0.082031,0.305664,27.349173,51.299965
2019-05-06 12:36:00,0.000000,210.371094,34.305470,35.364925,41.157520,33.551567,28.854691,0.134766,0.360352,0.125000,0.437500,28.305700,51.299965
2019-05-06 12:38:00,0.000000,208.222656,34.530472,35.413589,41.037106,34.060547,28.853909,0.140625,0.392578,0.177734,0.459961,0.260094,51.299965
2019-05-06 12:40:00,0.000000,205.507812,34.852345,35.491409,40.935600,34.508595,28.867971,0.125000,0.342773,0.158203,0.433594,28.762465,51.299965


In [8]:
# Agrupamiento temporal por intervalos de 12 minutos

# 1. Importaciones necesarias
from functools import partial
from pandas.tseries.frequencies import to_offset

# 2. Función para redondear timestamps
def roundfunc(t, freq):
    """Redondea el timestamp al intervalo temporal más cercano"""
    freq = to_offset(freq)  # Convierte el string de frecuencia a objeto Offset
    return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value)

# 3. Agrupa el DataFrame por intervalos de 12 minutos
df26 = df1.groupby(partial(roundfunc, freq='12T'))  # '12T' = 12 minutos

# 4. Metadatos del objeto GroupBy
attrs = {
    'var': 'df26',
    'type': type(df26).__name__,  # Será 'DataFrameGroupBy'
}
[attrs]  # Muestra: [{'var': 'df26', 'type': 'DataFrameGroupBy'}]

# 5. Inspección de los grupos
enum = enumerate(df26.groups)  # Crea un enumerador de grupos

# Primer grupo
nextel = next(enum)  # Obtiene (índice, timestamp) del primer grupo
print("\nPrimer intervalo:", nextel[1])
display(df26.get_group(nextel[1]))  # Muestra los datos del primer intervalo

# Segundo grupo
nextel = next(enum)  # Obtiene (índice, timestamp) del segundo grupo
print("\nSegundo intervalo:", nextel[1])
display(df26.get_group(nextel[1]))  # Muestra los datos del segundo intervalo

  freq = to_offset(freq)  # Convierte el string de frecuencia a objeto Offset
  return pd.Timestamp((t.value // freq.delta.value) * freq.delta.value)


[{'var': 'df26', 'type': 'DataFrameGroupBy'}]


Primer intervalo: 2017-07-01 08:00:00


Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-07-01 08:00:00,67.05307,362.78125,39.607815,36.28849,55.492577,34.39426,34.886993,7.641797,10.317187,6.255078,14.703516,40.268482,28.600006
2017-07-01 08:02:00,59.177505,387.683594,39.49625,36.196407,55.340359,34.04961,34.832542,7.492578,9.986719,5.973828,13.184766,40.798763,28.600006
2017-07-01 08:04:00,63.528282,305.914062,39.351486,36.129932,55.478226,33.914143,34.774769,7.526172,10.844141,6.708594,14.6125,40.994202,28.600006
2017-07-01 08:06:00,65.614792,360.429688,39.30965,36.000206,55.266884,33.769375,34.651253,7.255859,9.58711,6.538281,13.705859,40.185005,28.600006
2017-07-01 08:08:00,68.345924,318.992188,39.190121,36.005245,55.60165,33.665783,34.540352,7.751172,10.176172,7.362891,14.808594,40.913494,28.600006
2017-07-01 08:10:00,69.081245,344.152344,39.07756,35.908829,55.523636,33.608673,34.435432,7.464844,9.665625,6.233594,12.664844,38.974487,28.600006



Segundo intervalo: 2017-07-01 08:12:00


Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-07-01 08:12:00,69.294701,387.011719,39.024105,36.090473,55.47559,34.158852,34.434105,7.379687,10.027344,6.206641,13.330469,40.23679,28.600006
2017-07-01 08:14:00,24.967997,422.472656,39.659943,35.45108,54.741035,34.449379,34.569572,1.474609,1.676563,0.96875,2.228906,36.98209,29.160004
2017-07-01 08:16:00,25.006594,434.09375,40.096565,35.857887,54.756111,34.790039,34.847153,1.511719,1.612891,1.082422,2.200781,18.073059,28.500004
2017-07-01 08:18:00,52.561611,355.808594,40.377464,36.817886,55.137192,35.441818,35.12207,6.29375,8.341797,5.827734,12.464453,42.840637,28.500004
2017-07-01 08:20:00,64.90905,319.933594,41.01297,38.064346,56.229942,36.83469,35.424885,7.358984,9.116015,6.955469,13.152344,42.156181,28.500004
2017-07-01 08:22:00,65.288956,298.113281,41.570118,38.552181,56.832901,37.502735,35.6852,7.726953,9.034765,6.432422,13.092188,40.584854,28.500004


In [9]:
# Cálculo de la mediana para cada grupo temporal

# 1. Calcula la mediana para cada variable en cada intervalo de 12 minutos
df26 = df26.aggregate('median')

# 2. Metadatos del DataFrame resultante
attrs = {
    'var': 'df26',               # Nombre de la variable
    'type': type(df26).__name__,  # Tipo de objeto (DataFrame)
    'forma': df26.shape          # Dimensiones del DataFrame resultante
}

# 3. Muestra resultados:
[attrs]  # Ejemplo: [{'var': 'df26', 'type': 'DataFrame', 'forma': (X, Y)}]
df26      # Muestra el DataFrame con las medianas por intervalo

[{'var': 'df26', 'type': 'DataFrame', 'forma': (47600, 13)}]

Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-07-01 08:00:00,6.633393e+01,352.291016,39.330568,36.067589,55.485401,33.841759,34.713011,7.509375,10.081446,6.396680,14.159180,40.533623,28.600006
2017-07-01 08:12:00,5.873533e+01,371.410156,40.237015,36.454180,55.306391,35.115929,34.984612,6.826367,8.688281,6.017188,12.778320,40.410822,28.500004
2017-07-01 08:24:00,7.330124e+01,306.683594,42.924143,39.803396,57.962978,38.862738,36.497345,7.344141,9.495117,6.327344,13.664258,40.086174,28.500004
2017-07-01 08:36:00,7.359566e+01,263.185547,44.400021,41.389423,59.253897,40.636448,37.869467,7.599805,9.619336,7.019141,13.967968,41.802011,28.500004
2017-07-01 08:48:00,7.216714e+01,300.306641,43.409241,40.016947,57.869795,37.751429,38.141232,7.640820,9.582422,6.680078,13.574414,41.188478,28.500004
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-05-06 11:48:00,3.816504e-28,191.533203,40.859182,39.509026,53.035002,38.809961,33.665823,0.247559,0.545898,0.271484,0.688477,12.427644,51.299965
2019-05-06 12:00:00,0.000000e+00,187.421875,37.240042,38.162100,45.844812,34.772854,32.221487,0.134766,0.362793,0.154785,0.394531,3.659730,51.299965
2019-05-06 12:12:00,0.000000e+00,203.500977,35.421877,36.641315,43.335768,33.321487,30.623049,0.122070,0.319824,0.126465,0.371094,11.449250,51.299965
2019-05-06 12:24:00,0.000000e+00,214.321289,34.453516,35.566711,41.824305,32.437502,29.482814,0.099609,0.308594,0.122559,0.379883,27.432148,51.299965


In [10]:
# Guardado del DataFrame procesado en archivo CSV

# 1. Construye la ruta completa para guardar el archivo
file = os.path.join('C:/Users/Milovan/Desktop/notebook para repositorio/chancadores/', 'ChancadorComputacionAplicada12MINUTOS.csv')

# 2. Guarda el DataFrame con medianas a CSV
df26.to_csv(file, index=True)  # index=True para mantener la columna temporal

# 3. Crea metadatos de confirmación
attrs = {
    'var': 'empty',  # Indica que no hay objeto activo
    'type': 'empty',  # Indica que no hay tipo asociado
}

# 4. Muestra confirmación:
[attrs]  # Muestra: [{'var': 'empty', 'type': 'empty'}]
'csv file saved'  # Mensaje de confirmación

[{'var': 'empty', 'type': 'empty'}]

'csv file saved'

In [30]:
import os
import pandas as pd

# 1. CARGAR ARCHIVO CSV
# --------------------------------------------------
# Define la ruta del archivo CSV combinando directorio y nombre de archivo
archivo_csv = os.path.join(
    'C:/Users/Milovan/Desktop/notebook para repositorio/chancadores/',
    'ChancadorComputacionAplicada12MINUTOS.csv'
)

# Carga el archivo CSV en un DataFrame de pandas
# header=0 indica que la primera fila contiene los nombres de las columnas
df35 = pd.read_csv(archivo_csv, header=0)

# 2. CREAR DICCIONARIO DE METADATOS
# --------------------------------------------------
# Crea un diccionario con información descriptiva del DataFrame
metadata = {
    'var': 'df35',                    # Nombre de la variable
    'type': type(df35).__name__,      # Tipo de objeto (DataFrame)
    'shape': df35.shape,              # Dimensiones del DataFrame (filas, columnas)
    'columns': list(df35.columns)     # Lista de nombres de columnas
}

# 3. MOSTRAR RESULTADOS
# --------------------------------------------------
# Muestra el diccionario de metadatos
[metadata]

# Devuelve el DataFrame para visualización
df35

[{'var': 'df35',
  'type': 'DataFrame',
  'shape': (47600, 14),
  'columns': ['Tiempo',
   'Corriente',
   'Presión C. Eje',
   'T° Retorno',
   'T° socket Linner',
   'T° Excéntrica',
   'T° Alimentación',
   'T° Desc. C. Eje',
   'Salto anillo A',
   'Salto Anillo B',
   'Salto Anillo C',
   'Salto Anillo D',
   'Nivel Taza',
   'Setting']}]

Unnamed: 0,Tiempo,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
0,2017-07-01 08:00:00,6.633393e+01,352.291016,39.330568,36.067589,55.485401,33.841759,34.713011,7.509375,10.081446,6.396680,14.159180,40.533623,28.600006
1,2017-07-01 08:12:00,5.873533e+01,371.410156,40.237015,36.454180,55.306391,35.115929,34.984612,6.826367,8.688281,6.017188,12.778320,40.410822,28.500004
2,2017-07-01 08:24:00,7.330124e+01,306.683594,42.924143,39.803396,57.962978,38.862738,36.497345,7.344141,9.495117,6.327344,13.664258,40.086174,28.500004
3,2017-07-01 08:36:00,7.359566e+01,263.185547,44.400021,41.389423,59.253897,40.636448,37.869467,7.599805,9.619336,7.019141,13.967968,41.802011,28.500004
4,2017-07-01 08:48:00,7.216714e+01,300.306641,43.409241,40.016947,57.869795,37.751429,38.141232,7.640820,9.582422,6.680078,13.574414,41.188478,28.500004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47595,2019-05-06 11:48:00,3.816504e-28,191.533203,40.859182,39.509026,53.035002,38.809961,33.665823,0.247559,0.545898,0.271484,0.688477,12.427644,51.299965
47596,2019-05-06 12:00:00,0.000000e+00,187.421875,37.240042,38.162100,45.844812,34.772854,32.221487,0.134766,0.362793,0.154785,0.394531,3.659730,51.299965
47597,2019-05-06 12:12:00,0.000000e+00,203.500977,35.421877,36.641315,43.335768,33.321487,30.623049,0.122070,0.319824,0.126465,0.371094,11.449250,51.299965
47598,2019-05-06 12:24:00,0.000000e+00,214.321289,34.453516,35.566711,41.824305,32.437502,29.482814,0.099609,0.308594,0.122559,0.379883,27.432148,51.299965


In [32]:
# 1. CONVERSIÓN DE COLUMNA DE TIEMPO
# --------------------------------------------------
# Asignación redundante eliminada (df35 = df35 no era necesaria)

# Obtiene la columna 'Tiempo' del DataFrame
serie_tiempo = df35['Tiempo']

# Convierte la columna a formato datetime, manejando errores con 'coerce' 
# (convierte valores problemáticos a NaT - Not a Time)
df35['Tiempo'] = pd.to_datetime(serie_tiempo, errors='coerce')

# 2. CREACIÓN DE METADATOS MEJORADA
# --------------------------------------------------
# Diccionario ampliado con información más útil sobre el DataFrame
metadata = {
    'variable': 'df35',                          # Nombre de la variable
    'tipo': type(df35).__name__,                 # Tipo de objeto
    'forma': df35.shape,                         # Dimensiones (filas, columnas)
    'columnas': list(df35.columns),               # Lista de columnas
    'tipo_datos_tiempo': str(df35['Tiempo'].dtype),  # Tipo de datos de la columna Tiempo
    'valores_temporales_nulos': df35['Tiempo'].isna().sum()  # Conteo de valores nulos
}

# 3. VISUALIZACIÓN DE RESULTADOS
# --------------------------------------------------
# Muestra los metadatos mejorados
[metadata]

# Devuelve el DataFrame para inspección
df35

[{'variable': 'df35',
  'tipo': 'DataFrame',
  'forma': (47600, 14),
  'columnas': ['Tiempo',
   'Corriente',
   'Presión C. Eje',
   'T° Retorno',
   'T° socket Linner',
   'T° Excéntrica',
   'T° Alimentación',
   'T° Desc. C. Eje',
   'Salto anillo A',
   'Salto Anillo B',
   'Salto Anillo C',
   'Salto Anillo D',
   'Nivel Taza',
   'Setting'],
  'tipo_datos_tiempo': 'datetime64[ns]',
  'valores_temporales_nulos': 0}]

Unnamed: 0,Tiempo,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
0,2017-07-01 08:00:00,6.633393e+01,352.291016,39.330568,36.067589,55.485401,33.841759,34.713011,7.509375,10.081446,6.396680,14.159180,40.533623,28.600006
1,2017-07-01 08:12:00,5.873533e+01,371.410156,40.237015,36.454180,55.306391,35.115929,34.984612,6.826367,8.688281,6.017188,12.778320,40.410822,28.500004
2,2017-07-01 08:24:00,7.330124e+01,306.683594,42.924143,39.803396,57.962978,38.862738,36.497345,7.344141,9.495117,6.327344,13.664258,40.086174,28.500004
3,2017-07-01 08:36:00,7.359566e+01,263.185547,44.400021,41.389423,59.253897,40.636448,37.869467,7.599805,9.619336,7.019141,13.967968,41.802011,28.500004
4,2017-07-01 08:48:00,7.216714e+01,300.306641,43.409241,40.016947,57.869795,37.751429,38.141232,7.640820,9.582422,6.680078,13.574414,41.188478,28.500004
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47595,2019-05-06 11:48:00,3.816504e-28,191.533203,40.859182,39.509026,53.035002,38.809961,33.665823,0.247559,0.545898,0.271484,0.688477,12.427644,51.299965
47596,2019-05-06 12:00:00,0.000000e+00,187.421875,37.240042,38.162100,45.844812,34.772854,32.221487,0.134766,0.362793,0.154785,0.394531,3.659730,51.299965
47597,2019-05-06 12:12:00,0.000000e+00,203.500977,35.421877,36.641315,43.335768,33.321487,30.623049,0.122070,0.319824,0.126465,0.371094,11.449250,51.299965
47598,2019-05-06 12:24:00,0.000000e+00,214.321289,34.453516,35.566711,41.824305,32.437502,29.482814,0.099609,0.308594,0.122559,0.379883,27.432148,51.299965


In [33]:
# ==================================================
# 1. CONFIGURACIÓN DEL ÍNDICE DEL DATAFRAME
# ==================================================

# Establece la columna 'Tiempo' como índice del DataFrame
# - `inplace=True` modifica el DataFrame original en lugar de devolver uno nuevo
df35.set_index('Tiempo', inplace=True)  

# ==================================================
# 2. METADATOS MEJORADOS DEL DATAFRAME
# ==================================================
metadata = {
    'nombre_variable': 'df35',                # Nombre de la variable
    'tipo_objeto': type(df35).__name__,       # Tipo de objeto (DataFrame)
    'dimensiones': df35.shape,                # Forma del DataFrame (filas, columnas)
    'columnas_restantes': list(df35.columns), # Columnas disponibles (excluyendo el índice)
    'tipo_indice': type(df35.index).__name__, # Tipo de índice (DatetimeIndex si 'Tiempo' era datetime)
    'ejemplo_indice': str(df35.index[0]) if len(df35) > 0 else "Índice vacío",  # Muestra un ejemplo del índice
}

# ==================================================
# 3. VISUALIZACIÓN DE RESULTADOS
# ==================================================
# Muestra los metadatos en formato de lista (como en el original)
[metadata]  

# Devuelve el DataFrame para inspección visual
df35  

[{'nombre_variable': 'df35',
  'tipo_objeto': 'DataFrame',
  'dimensiones': (47600, 13),
  'columnas_restantes': ['Corriente',
   'Presión C. Eje',
   'T° Retorno',
   'T° socket Linner',
   'T° Excéntrica',
   'T° Alimentación',
   'T° Desc. C. Eje',
   'Salto anillo A',
   'Salto Anillo B',
   'Salto Anillo C',
   'Salto Anillo D',
   'Nivel Taza',
   'Setting'],
  'tipo_indice': 'DatetimeIndex',
  'ejemplo_indice': '2017-07-01 08:00:00'}]

Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-07-01 08:00:00,6.633393e+01,352.291016,39.330568,36.067589,55.485401,33.841759,34.713011,7.509375,10.081446,6.396680,14.159180,40.533623,28.600006
2017-07-01 08:12:00,5.873533e+01,371.410156,40.237015,36.454180,55.306391,35.115929,34.984612,6.826367,8.688281,6.017188,12.778320,40.410822,28.500004
2017-07-01 08:24:00,7.330124e+01,306.683594,42.924143,39.803396,57.962978,38.862738,36.497345,7.344141,9.495117,6.327344,13.664258,40.086174,28.500004
2017-07-01 08:36:00,7.359566e+01,263.185547,44.400021,41.389423,59.253897,40.636448,37.869467,7.599805,9.619336,7.019141,13.967968,41.802011,28.500004
2017-07-01 08:48:00,7.216714e+01,300.306641,43.409241,40.016947,57.869795,37.751429,38.141232,7.640820,9.582422,6.680078,13.574414,41.188478,28.500004
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-05-06 11:48:00,3.816504e-28,191.533203,40.859182,39.509026,53.035002,38.809961,33.665823,0.247559,0.545898,0.271484,0.688477,12.427644,51.299965
2019-05-06 12:00:00,0.000000e+00,187.421875,37.240042,38.162100,45.844812,34.772854,32.221487,0.134766,0.362793,0.154785,0.394531,3.659730,51.299965
2019-05-06 12:12:00,0.000000e+00,203.500977,35.421877,36.641315,43.335768,33.321487,30.623049,0.122070,0.319824,0.126465,0.371094,11.449250,51.299965
2019-05-06 12:24:00,0.000000e+00,214.321289,34.453516,35.566711,41.824305,32.437502,29.482814,0.099609,0.308594,0.122559,0.379883,27.432148,51.299965


In [34]:
# ==================================================
# 1. CARGA DEL ARCHIVO EXCEL
# ==================================================
import os
import pandas as pd

# Define la ruta del archivo Excel combinando directorio y nombre de archivo
ruta_archivo = os.path.join(
    'C:/Users/Milovan/Desktop/notebook para repositorio/chancadores/',
    'DetencionesRev.xlsx'
)

# Carga el archivo Excel en un DataFrame
# - header=0 indica que la primera fila contiene los nombres de columnas
# - pd.read_excel() soporta formatos .xls y .xlsx
df9 = pd.read_excel(ruta_archivo, header=0)

# ==================================================
# 2. METADATOS MEJORADOS DEL DATAFRAME
# ==================================================
metadata = {
    'nombre_variable': 'df9',                      # Nombre de la variable
    'tipo_objeto': type(df9).__name__,            # Tipo de objeto (DataFrame)
    'dimensiones': df9.shape,                      # Forma del DataFrame (filas, columnas)
    'columnas': list(df9.columns),                 # Nombres de columnas disponibles
    'tipo_columnas': df9.dtypes.to_dict(),         # Tipos de datos por columna
    'muestra_filas': df9.head(2).to_dict('list')   # Ejemplo de datos (2 primeras filas)
}

# ==================================================
# 3. VISUALIZACIÓN DE RESULTADOS
# ==================================================
# Muestra los metadatos en formato de lista (como en el original)
[metadata]

# Devuelve el DataFrame para inspección visual
df9

[{'nombre_variable': 'df9',
  'tipo_objeto': 'DataFrame',
  'dimensiones': (2595, 14),
  'columnas': ['Unnamed: 0',
   'Día',
   'Fecha',
   'Tipo de Detención',
   'Detalle de Reporte',
   'Equipo',
   'Causa',
   'Inicio',
   'Fin',
   'Tiempo Detención (hrs)',
   'Tipo de Detención (2)',
   'MES',
   'Año',
   'Estado'],
  'tipo_columnas': {'Unnamed: 0': dtype('int64'),
   'Día': dtype('int64'),
   'Fecha': dtype('<M8[ns]'),
   'Tipo de Detención': dtype('O'),
   'Detalle de Reporte': dtype('O'),
   'Equipo': dtype('O'),
   'Causa': dtype('O'),
   'Inicio': dtype('<M8[ns]'),
   'Fin': dtype('<M8[ns]'),
   'Tiempo Detención (hrs)': dtype('float64'),
   'Tipo de Detención (2)': dtype('int64'),
   'MES': dtype('int64'),
   'Año': dtype('int64'),
   'Estado': dtype('float64')},
  'muestra_filas': {'Unnamed: 0': [0, 1],
   'Día': [2, 2],
   'Fecha': [Timestamp('2017-01-02 00:00:00'),
    Timestamp('2017-01-02 00:00:00')],
   'Tipo de Detención': ['Falla', 'Operacional'],
   'Detalle de R

Unnamed: 0.1,Unnamed: 0,Día,Fecha,Tipo de Detención,Detalle de Reporte,Equipo,Causa,Inicio,Fin,Tiempo Detención (hrs),Tipo de Detención (2),MES,Año,Estado
0,0,2,2017-01-02,Falla,L3 \ \ Correa 004,130-CV-004,Falla Control e Instrumentación,2017-01-02 12:42:40,2017-01-02 13:05:50,0.386111,1,1,2017,
1,1,2,2017-01-02,Operacional,L3 \ \ Feeder 130-FE-006,130-FE-006,Otra Causa,2017-01-02 14:47:50,2017-01-02 14:51:02,0.053333,0,1,2017,
2,2,2,2017-01-02,Falla,L3 \ \ Correa 004,130-CV-004,Otra Causa,2017-01-02 16:10:02,2017-01-03 00:32:50,8.380000,1,1,2017,
3,3,2,2017-01-02,Operacional,L3 \ \ Feeder 130-FE-006,130-FE-006,Otra Causa,2017-01-03 01:58:20,2017-01-03 02:08:02,0.161667,0,1,2017,
4,4,2,2017-01-02,Bajo Stock Mina,L3 \ \ Sistema,Sistema,Bajo Stock Mina,2017-01-03 05:21:30,2017-01-03 05:46:50,0.422222,0,1,2017,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2590,2590,22,2019-08-22,Bajo Stock Mina,L3 \ \ Sistema,Sistema,Bajo Stock Mina,2019-08-22 10:48:15,2019-08-23 08:00:00,21.195833,0,8,2019,
2591,2591,23,2019-08-23,Bajo Stock Mina,L3 \ \ Sistema,Sistema,Mantención Programada,2019-08-23 08:00:00,2019-08-26 08:00:00,72.000000,0,8,2019,
2592,2592,26,2019-08-26,Bajo Stock Mina,L3 \ \ Sistema,Sistema,Retraso Mantención Programada,2019-08-26 08:00:00,2019-08-26 16:16:15,8.270833,0,8,2019,
2593,2593,29,2019-08-29,Operacional,L3 \ \ Sistema,Sistema,Enclavamiento,2019-08-29 11:29:45,2019-08-29 15:40:55,4.186111,0,8,2019,


In [35]:
# ==================================================
# 1. LIMPIEZA DE COLUMNAS NO DESEADAS
# ==================================================
# Elimina columnas específicas del DataFrame que no son necesarias para el análisis
columnas_a_eliminar = [
    'Unnamed: 0',               # Columna de índice generada automáticamente (redundante)
    'Año',                      # Información temporal que podría estar en otra columna
    'Estado',                   # Datos categóricos no relevantes para este análisis
    'Tipo de Detención (2)',    # Información duplicada o alternativa
    'MES',                      # Redundante si ya hay fecha completa
    'Tiempo Detención (hrs)',   # No requerida para el análisis actual
    'Fecha',                    # Posible duplicado de otra columna temporal
    'Detalle de Reporte',       # Texto libre no estructurado
    'Día',                      # Redundante si hay fecha completa
]

# Eliminación eficiente de múltiples columnas (axis=1 para columnas)
df9 = df9.drop(columns=columnas_a_eliminar)

# ==================================================
# 2. METADATOS MEJORADOS POST-LIMPIEZA
# ==================================================
metadata = {
    'nombre_variable': 'df9',                      # Identificador del DataFrame
    'tipo_objeto': type(df9).__name__,            # Confirmación de que sigue siendo DataFrame
    'dimensiones_post_limpieza': df9.shape,        # Nuevo tamaño (filas, columnas restantes)
    'columnas_restantes': list(df9.columns),       # Lista actualizada de columnas
    'columnas_eliminadas': columnas_a_eliminar,    # Registro histórico de lo eliminado
    'tipos_datos_actuales': df9.dtypes.to_dict()  # Tipos de datos de las columnas restantes
}

# ==================================================
# 3. VISUALIZACIÓN DE RESULTADOS
# ==================================================
# Muestra metadatos mejorados en formato de lista
[metadata]

# Devuelve el DataFrame limpio para inspección
df9

[{'nombre_variable': 'df9',
  'tipo_objeto': 'DataFrame',
  'dimensiones_post_limpieza': (2595, 5),
  'columnas_restantes': ['Tipo de Detención',
   'Equipo',
   'Causa',
   'Inicio',
   'Fin'],
  'columnas_eliminadas': ['Unnamed: 0',
   'Año',
   'Estado',
   'Tipo de Detención (2)',
   'MES',
   'Tiempo Detención (hrs)',
   'Fecha',
   'Detalle de Reporte',
   'Día'],
  'tipos_datos_actuales': {'Tipo de Detención': dtype('O'),
   'Equipo': dtype('O'),
   'Causa': dtype('O'),
   'Inicio': dtype('<M8[ns]'),
   'Fin': dtype('<M8[ns]')}}]

Unnamed: 0,Tipo de Detención,Equipo,Causa,Inicio,Fin
0,Falla,130-CV-004,Falla Control e Instrumentación,2017-01-02 12:42:40,2017-01-02 13:05:50
1,Operacional,130-FE-006,Otra Causa,2017-01-02 14:47:50,2017-01-02 14:51:02
2,Falla,130-CV-004,Otra Causa,2017-01-02 16:10:02,2017-01-03 00:32:50
3,Operacional,130-FE-006,Otra Causa,2017-01-03 01:58:20,2017-01-03 02:08:02
4,Bajo Stock Mina,Sistema,Bajo Stock Mina,2017-01-03 05:21:30,2017-01-03 05:46:50
...,...,...,...,...,...
2590,Bajo Stock Mina,Sistema,Bajo Stock Mina,2019-08-22 10:48:15,2019-08-23 08:00:00
2591,Bajo Stock Mina,Sistema,Mantención Programada,2019-08-23 08:00:00,2019-08-26 08:00:00
2592,Bajo Stock Mina,Sistema,Retraso Mantención Programada,2019-08-26 08:00:00,2019-08-26 16:16:15
2593,Operacional,Sistema,Enclavamiento,2019-08-29 11:29:45,2019-08-29 15:40:55


In [37]:
# ==============================================
# FILTRADO DE DATOS PARA EQUIPO ESPECÍFICO
# ==============================================
# Crear condición para filtrar solo registros del equipo '130-CV-004'
filtro_equipo = df9['Equipo'] == '130-CV-004'  # Versión más legible que .eq()

# Aplicar el filtro al DataFrame (usando .copy() para evitar warnings)
df9 = df9[filtro_equipo].copy()

# ==============================================
# METADATOS MEJORADOS
# ==============================================
metadatos = {
    'nombre_variable': 'df9',                     # Nombre del DataFrame
    'tipo_objeto': type(df9).__name__,           # Tipo del objeto
    'filas_filtradas': len(df9),                 # Cantidad de registros después del filtro
    'columnas_restantes': list(df9.columns),     # Columnas disponibles
    'equipo_filtrado': '130-CV-004',             # Valor usado para el filtro
    'porcentaje_filtrado': f"{len(df9)/len(filtro_equipo)*100:.2f}% del total",  # % filtrado
    'ejemplo_registros': df9.head(1).to_dict('records') if len(df9) > 0 else "Sin datos"  # Muestra de datos
}

# ==============================================
# VISUALIZACIÓN DE RESULTADOS
# ==============================================
# Mostrar metadatos (formato original mejorado)
[metadatos]

# Mostrar el DataFrame resultante
df9

[{'nombre_variable': 'df9',
  'tipo_objeto': 'DataFrame',
  'filas_filtradas': 116,
  'columnas_restantes': ['Tipo de Detención',
   'Equipo',
   'Causa',
   'Inicio',
   'Fin'],
  'equipo_filtrado': '130-CV-004',
  'porcentaje_filtrado': '4.47% del total',
  'ejemplo_registros': [{'Tipo de Detención': 'Falla',
    'Equipo': '130-CV-004',
    'Causa': 'Falla Control e Instrumentación',
    'Inicio': Timestamp('2017-01-02 12:42:40'),
    'Fin': Timestamp('2017-01-02 13:05:50')}]}]

Unnamed: 0,Tipo de Detención,Equipo,Causa,Inicio,Fin
0,Falla,130-CV-004,Falla Control e Instrumentación,2017-01-02 12:42:40,2017-01-02 13:05:50
2,Falla,130-CV-004,Otra Causa,2017-01-02 16:10:02,2017-01-03 00:32:50
32,Falla,130-CV-004,Otra Causa,2017-01-07 11:35:50,2017-01-07 13:39:30
33,Falla,130-CV-004,Falla Eléctrica,2017-01-08 11:15:30,2017-01-08 11:18:40
42,Falla,130-CV-004,Falla Control e Instrumentación,2017-01-12 05:37:20,2017-01-12 06:41:10
...,...,...,...,...,...
2497,Falla,130-CV-004,Falla Eléctrica,2018-12-22 02:02:01,2018-12-22 02:05:10
2522,Operacional,130-CV-004,Desalineamiento,2019-01-01 04:09:10,2019-01-01 04:13:50
2545,Falla,130-CV-004,Falla Mecánica,2019-02-16 08:25:15,2019-02-16 18:35:35
2547,Falla,130-CV-004,Falla Eléctrica,2019-02-18 09:00:30,2019-02-18 13:12:25


In [38]:
# ==================================================
# RE-CLASIFICACIÓN DE TIPOS DE DETENCIÓN
# ==================================================
# Mantenemos la referencia original (esto es redundante y puede eliminarse)
df9 = df9  

# Creamos condición para identificar tipos de detención a re-clasificar
condicion_reclasificacion = df9['Tipo de Detención'].isin(["Operacional", "Mantención programada"])

# Reemplazamos los valores que cumplen la condición por 'Pausa'
# Usamos inplace=True para modificar directamente el DataFrame
df9['Tipo de Detención'].mask(condicion_reclasificacion, other='Pausa', inplace=True)

# ==================================================
# METADATOS MEJORADOS POST-RECLASIFICACIÓN
# ==================================================
metadatos = {
    'nombre_variable': 'df9',                            # Identificador del DataFrame
    'tipo_objeto': type(df9).__name__,                  # Tipo de objeto (DataFrame)
    'dimensiones': df9.shape,                           # Forma del DataFrame (filas, columnas)
    'valores_reemplazados': {                           # Estadísticas de reemplazo
        'valores_originales': ["Operacional", "Mantención programada"],
        'nuevo_valor': 'Pausa',
        'registros_afectados': condicion_reclasificacion.sum(),
        'porcentaje_afectado': f"{condicion_reclasificacion.mean()*100:.2f}%"
    },
    'distribucion_actual': df9['Tipo de Detención'].value_counts().to_dict()  # Conteo actual
}

# ==================================================
# VISUALIZACIÓN DE RESULTADOS
# ==================================================
# Mostramos metadatos en formato de lista
[metadatos]

# Devolvemos el DataFrame modificado para inspección
df9

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df9['Tipo de Detención'].mask(condicion_reclasificacion, other='Pausa', inplace=True)


[{'nombre_variable': 'df9',
  'tipo_objeto': 'DataFrame',
  'dimensiones': (116, 5),
  'valores_reemplazados': {'valores_originales': ['Operacional',
    'Mantención programada'],
   'nuevo_valor': 'Pausa',
   'registros_afectados': 34,
   'porcentaje_afectado': '29.31%'},
  'distribucion_actual': {'Falla': 79,
   'Pausa': 34,
   'Mantención Programada': 3}}]

Unnamed: 0,Tipo de Detención,Equipo,Causa,Inicio,Fin
0,Falla,130-CV-004,Falla Control e Instrumentación,2017-01-02 12:42:40,2017-01-02 13:05:50
2,Falla,130-CV-004,Otra Causa,2017-01-02 16:10:02,2017-01-03 00:32:50
32,Falla,130-CV-004,Otra Causa,2017-01-07 11:35:50,2017-01-07 13:39:30
33,Falla,130-CV-004,Falla Eléctrica,2017-01-08 11:15:30,2017-01-08 11:18:40
42,Falla,130-CV-004,Falla Control e Instrumentación,2017-01-12 05:37:20,2017-01-12 06:41:10
...,...,...,...,...,...
2497,Falla,130-CV-004,Falla Eléctrica,2018-12-22 02:02:01,2018-12-22 02:05:10
2522,Pausa,130-CV-004,Desalineamiento,2019-01-01 04:09:10,2019-01-01 04:13:50
2545,Falla,130-CV-004,Falla Mecánica,2019-02-16 08:25:15,2019-02-16 18:35:35
2547,Falla,130-CV-004,Falla Eléctrica,2019-02-18 09:00:30,2019-02-18 13:12:25


In [39]:
# ==================================================
# 1. CREACIÓN DE ÍNDICE DE INTERVALOS
# ==================================================
# Creamos un IntervalIndex a partir de las columnas de inicio y fin
interval_index = pd.IntervalIndex.from_arrays(
    df9['Inicio'],          # Tiempos de inicio
    df9['Fin'],             # Tiempos de fin
    closed='left'           # El intervalo incluye el inicio pero no el fin
)

# Configuramos el IntervalIndex como índice del DataFrame
df9_interval = df9.set_index(interval_index)

# Eliminamos las columnas de inicio y fin que ya están en el índice
df9_interval = df9_interval.drop(columns=['Inicio', 'Fin'])

# ==================================================
# 2. CREACIÓN DE ETIQUETAS DE ESTADO
# ==================================================
# Creamos un DataFrame con el mismo índice que df35 (datos de referencia)
df_labels13 = pd.DataFrame(index=df35.index)

# Inicializamos todos los estados como 'Operativo' (valor por defecto)
df_labels13['Estado de salud'] = 'Operativo'

# Iteramos sobre cada intervalo de detención
for intervalo, datos in df9_interval.iterrows():
    # Creamos máscara para los tiempos dentro del intervalo
    mask = (df_labels13.index >= intervalo.left) & (df_labels13.index < intervalo.right)
    
    # Actualizamos el estado para los registros dentro del intervalo
    df_labels13.loc[mask, 'Estado de salud'] = datos['Tipo de Detención']

# ==================================================
# 3. METADATOS MEJORADOS
# ==================================================
metadatos = {
    'nombre_variable': 'df_labels13',
    'tipo_objeto': type(df_labels13).__name__,
    'dimensiones': df_labels13.shape,
    'columnas': list(df_labels13.columns),
    'distribucion_estados': df_labels13['Estado de salud'].value_counts().to_dict(),
    'periodo_cubierto': {
        'inicio': df_labels13.index.min(),
        'fin': df_labels13.index.max()
    }
}

# ==================================================
# 4. VISUALIZACIÓN DE RESULTADOS
# ==================================================
[metadatos]
df_labels13

[{'nombre_variable': 'df_labels13',
  'tipo_objeto': 'DataFrame',
  'dimensiones': (47600, 1),
  'columnas': ['Estado de salud'],
  'distribucion_estados': {'Operativo': 47394,
   'Falla': 181,
   'Mantención Programada': 19,
   'Pausa': 6},
  'periodo_cubierto': {'inicio': Timestamp('2017-07-01 08:00:00'),
   'fin': Timestamp('2019-05-06 12:36:00')}}]

Unnamed: 0_level_0,Estado de salud
Tiempo,Unnamed: 1_level_1
2017-07-01 08:00:00,Operativo
2017-07-01 08:12:00,Operativo
2017-07-01 08:24:00,Operativo
2017-07-01 08:36:00,Operativo
2017-07-01 08:48:00,Operativo
...,...
2019-05-06 11:48:00,Operativo
2019-05-06 12:00:00,Operativo
2019-05-06 12:12:00,Operativo
2019-05-06 12:24:00,Operativo


In [40]:
# ==================================================
# 1. CONCATENACIÓN DE DATAFRAMES
# ==================================================
# Combinamos horizontalmente (axis=1) los DataFrames:
# - df35: Datos principales de operación
# - df_labels13: Estados de salud calculados
df16 = pd.concat(
    [df35, df_labels13],
    axis='columns',  # Concatenación horizontal (por columnas)
    join='outer',    # Unión externa (mantiene todos los índices)
    verify_integrity=True  # Verifica que no haya columnas duplicadas
)

# ==================================================
# 2. METADATOS MEJORADOS
# ==================================================
metadatos = {
    'nombre_variable': 'df16',
    'tipo_objeto': type(df16).__name__,
    'dimensiones': df16.shape,
    'columnas': list(df16.columns),
    'origen_datos': {
        'df35': df35.shape,
        'df_labels13': df_labels13.shape
    },
    'solapamiento_temporal': {
        'inicio_comun': max(df35.index.min(), df_labels13.index.min()),
        'fin_comun': min(df35.index.max(), df_labels13.index.max())
    },
    'estadisticas_estado': df16['Estado de salud'].value_counts().to_dict()
}

# ==================================================
# 3. VISUALIZACIÓN DE RESULTADOS
# ==================================================
[metadatos]
df16

[{'nombre_variable': 'df16',
  'tipo_objeto': 'DataFrame',
  'dimensiones': (47600, 14),
  'columnas': ['Corriente',
   'Presión C. Eje',
   'T° Retorno',
   'T° socket Linner',
   'T° Excéntrica',
   'T° Alimentación',
   'T° Desc. C. Eje',
   'Salto anillo A',
   'Salto Anillo B',
   'Salto Anillo C',
   'Salto Anillo D',
   'Nivel Taza',
   'Setting',
   'Estado de salud'],
  'origen_datos': {'df35': (47600, 13), 'df_labels13': (47600, 1)},
  'solapamiento_temporal': {'inicio_comun': Timestamp('2017-07-01 08:00:00'),
   'fin_comun': Timestamp('2019-05-06 12:36:00')},
  'estadisticas_estado': {'Operativo': 47394,
   'Falla': 181,
   'Mantención Programada': 19,
   'Pausa': 6}}]

Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting,Estado de salud
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2017-07-01 08:00:00,6.633393e+01,352.291016,39.330568,36.067589,55.485401,33.841759,34.713011,7.509375,10.081446,6.396680,14.159180,40.533623,28.600006,Operativo
2017-07-01 08:12:00,5.873533e+01,371.410156,40.237015,36.454180,55.306391,35.115929,34.984612,6.826367,8.688281,6.017188,12.778320,40.410822,28.500004,Operativo
2017-07-01 08:24:00,7.330124e+01,306.683594,42.924143,39.803396,57.962978,38.862738,36.497345,7.344141,9.495117,6.327344,13.664258,40.086174,28.500004,Operativo
2017-07-01 08:36:00,7.359566e+01,263.185547,44.400021,41.389423,59.253897,40.636448,37.869467,7.599805,9.619336,7.019141,13.967968,41.802011,28.500004,Operativo
2017-07-01 08:48:00,7.216714e+01,300.306641,43.409241,40.016947,57.869795,37.751429,38.141232,7.640820,9.582422,6.680078,13.574414,41.188478,28.500004,Operativo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-05-06 11:48:00,3.816504e-28,191.533203,40.859182,39.509026,53.035002,38.809961,33.665823,0.247559,0.545898,0.271484,0.688477,12.427644,51.299965,Operativo
2019-05-06 12:00:00,0.000000e+00,187.421875,37.240042,38.162100,45.844812,34.772854,32.221487,0.134766,0.362793,0.154785,0.394531,3.659730,51.299965,Operativo
2019-05-06 12:12:00,0.000000e+00,203.500977,35.421877,36.641315,43.335768,33.321487,30.623049,0.122070,0.319824,0.126465,0.371094,11.449250,51.299965,Operativo
2019-05-06 12:24:00,0.000000e+00,214.321289,34.453516,35.566711,41.824305,32.437502,29.482814,0.099609,0.308594,0.122559,0.379883,27.432148,51.299965,Operativo


In [43]:
# ==================================================
# FILTRADO SEGURO DE DATAFRAME (CON VALIDACIONES)
# ==================================================

try:
    # 1. Validación previa al filtrado
    if 'Estado de salud' not in df16.columns:
        raise KeyError("La columna 'Estado de salud' no existe en el DataFrame")
    
    # 2. Filtrado con manejo de casos especiales
    condicion = df16['Estado de salud'].notna() & (df16['Estado de salud'] != 'Pausa')
    df16_filtrado = df16[condicion].copy()
    
    # 3. Generación de metadatos robustos
    metadatos = {
        'variable': 'df16_filtrado',
        'tipo': type(df16_filtrado).__name__,
        'registros_originales': len(df16),
        'registros_filtrados': len(df16_filtrado),
        'porcentaje_retencion': f"{(len(df16_filtrado)/len(df16))*100:.1f}%",
        'columnas': df16_filtrado.columns.tolist(),
        'estadisticas': {
            'distribucion_estados': df16_filtrado['Estado de salud'].value_counts().to_dict(),
            'valores_nulos': df16_filtrado.isna().sum().to_dict()
        },
        'advertencia': "Se eliminaron registros con 'Pausa' o valores nulos" if len(df16) != len(df16_filtrado) else "No se eliminaron registros"
    }

    # 4. Resultados estructurados
    print("\n● Resultados del Filtrado ●")
    print("═"*50)
    print(f"Registros originales: {len(df16)}")
    print(f"Registros filtrados: {len(df16_filtrado)}")
    print(f"Porcentaje retenido: {metadatos['porcentaje_retencion']}\n")
    
    [metadatos]
    df16 = df16_filtrado  # Asignación final después de validaciones

except Exception as e:
    print(f"\n⚠ Error en el filtrado: {str(e)}")
    print("Se mantuvo el DataFrame original sin modificaciones.")
    [{'error': str(e), 'accion': 'No se aplicaron cambios'}]
    df16  # Devuelve el DataFrame original en caso de error

# Muestra el DataFrame resultante (o original si hubo error)
df16


● Resultados del Filtrado ●
══════════════════════════════════════════════════
Registros originales: 47594
Registros filtrados: 47594
Porcentaje retenido: 100.0%



[{'variable': 'df16_filtrado',
  'tipo': 'DataFrame',
  'registros_originales': 47594,
  'registros_filtrados': 47594,
  'porcentaje_retencion': '100.0%',
  'columnas': ['Corriente',
   'Presión C. Eje',
   'T° Retorno',
   'T° socket Linner',
   'T° Excéntrica',
   'T° Alimentación',
   'T° Desc. C. Eje',
   'Salto anillo A',
   'Salto Anillo B',
   'Salto Anillo C',
   'Salto Anillo D',
   'Nivel Taza',
   'Setting',
   'Estado de salud'],
  'estadisticas': {'distribucion_estados': {'Operativo': 47394,
    'Falla': 181,
    'Mantención Programada': 19},
   'valores_nulos': {'Corriente': 0,
    'Presión C. Eje': 0,
    'T° Retorno': 0,
    'T° socket Linner': 0,
    'T° Excéntrica': 0,
    'T° Alimentación': 0,
    'T° Desc. C. Eje': 0,
    'Salto anillo A': 0,
    'Salto Anillo B': 0,
    'Salto Anillo C': 0,
    'Salto Anillo D': 0,
    'Nivel Taza': 0,
    'Setting': 0,
    'Estado de salud': 0}},
  'advertencia': 'No se eliminaron registros'}]

Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting,Estado de salud
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2017-07-01 08:00:00,6.633393e+01,352.291016,39.330568,36.067589,55.485401,33.841759,34.713011,7.509375,10.081446,6.396680,14.159180,40.533623,28.600006,Operativo
2017-07-01 08:12:00,5.873533e+01,371.410156,40.237015,36.454180,55.306391,35.115929,34.984612,6.826367,8.688281,6.017188,12.778320,40.410822,28.500004,Operativo
2017-07-01 08:24:00,7.330124e+01,306.683594,42.924143,39.803396,57.962978,38.862738,36.497345,7.344141,9.495117,6.327344,13.664258,40.086174,28.500004,Operativo
2017-07-01 08:36:00,7.359566e+01,263.185547,44.400021,41.389423,59.253897,40.636448,37.869467,7.599805,9.619336,7.019141,13.967968,41.802011,28.500004,Operativo
2017-07-01 08:48:00,7.216714e+01,300.306641,43.409241,40.016947,57.869795,37.751429,38.141232,7.640820,9.582422,6.680078,13.574414,41.188478,28.500004,Operativo
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-05-06 11:48:00,3.816504e-28,191.533203,40.859182,39.509026,53.035002,38.809961,33.665823,0.247559,0.545898,0.271484,0.688477,12.427644,51.299965,Operativo
2019-05-06 12:00:00,0.000000e+00,187.421875,37.240042,38.162100,45.844812,34.772854,32.221487,0.134766,0.362793,0.154785,0.394531,3.659730,51.299965,Operativo
2019-05-06 12:12:00,0.000000e+00,203.500977,35.421877,36.641315,43.335768,33.321487,30.623049,0.122070,0.319824,0.126465,0.371094,11.449250,51.299965,Operativo
2019-05-06 12:24:00,0.000000e+00,214.321289,34.453516,35.566711,41.824305,32.437502,29.482814,0.099609,0.308594,0.122559,0.379883,27.432148,51.299965,Operativo


In [44]:
# ==================================================
# RECODIFICACIÓN SEGURA DE ESTADOS DE SALUD
# ==================================================

# Copia preventiva para evitar modificar el original (opcional)
df16 = df16.copy()

# Diccionario de mapeo para recodificación
mapeo_estados = {
    'Operativo': 0,
    'Falla': 1,
    # 'Pausa' se mantiene como está (no se menciona en el original)
}

# Recodificación eficiente con replace()
df16['Estado de salud'] = df16['Estado de salud'].replace(mapeo_estados)

# ==================================================
# METADATOS MEJORADOS
# ==================================================
metadatos = {
    'variable': 'df16',
    'tipo': type(df16).__name__,
    'dimensiones': df16.shape,
    'recodificacion': {
        'mapeo_aplicado': mapeo_estados,
        'valores_unicos': df16['Estado de salud'].value_counts().to_dict(),
        'valores_nulos': df16['Estado de salud'].isna().sum()
    },
    'columnas': df16.columns.tolist()
}

# ==================================================
# VISUALIZACIÓN DE RESULTADOS
# ==================================================
print("\n● Resumen de Recodificación ●")
print("═"*50)
print(f"Registros procesados: {len(df16)}")
print(f"Distribución final:\n{df16['Estado de salud'].value_counts()}\n")

[metadatos]
df16


● Resumen de Recodificación ●
══════════════════════════════════════════════════
Registros procesados: 47594
Distribución final:
Estado de salud
0                        47394
1                          181
Mantención Programada       19
Name: count, dtype: int64



[{'variable': 'df16',
  'tipo': 'DataFrame',
  'dimensiones': (47594, 14),
  'recodificacion': {'mapeo_aplicado': {'Operativo': 0, 'Falla': 1},
   'valores_unicos': {0: 47394, 1: 181, 'Mantención Programada': 19},
   'valores_nulos': 0},
  'columnas': ['Corriente',
   'Presión C. Eje',
   'T° Retorno',
   'T° socket Linner',
   'T° Excéntrica',
   'T° Alimentación',
   'T° Desc. C. Eje',
   'Salto anillo A',
   'Salto Anillo B',
   'Salto Anillo C',
   'Salto Anillo D',
   'Nivel Taza',
   'Setting',
   'Estado de salud']}]

Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting,Estado de salud
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2017-07-01 08:00:00,6.633393e+01,352.291016,39.330568,36.067589,55.485401,33.841759,34.713011,7.509375,10.081446,6.396680,14.159180,40.533623,28.600006,0
2017-07-01 08:12:00,5.873533e+01,371.410156,40.237015,36.454180,55.306391,35.115929,34.984612,6.826367,8.688281,6.017188,12.778320,40.410822,28.500004,0
2017-07-01 08:24:00,7.330124e+01,306.683594,42.924143,39.803396,57.962978,38.862738,36.497345,7.344141,9.495117,6.327344,13.664258,40.086174,28.500004,0
2017-07-01 08:36:00,7.359566e+01,263.185547,44.400021,41.389423,59.253897,40.636448,37.869467,7.599805,9.619336,7.019141,13.967968,41.802011,28.500004,0
2017-07-01 08:48:00,7.216714e+01,300.306641,43.409241,40.016947,57.869795,37.751429,38.141232,7.640820,9.582422,6.680078,13.574414,41.188478,28.500004,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-05-06 11:48:00,3.816504e-28,191.533203,40.859182,39.509026,53.035002,38.809961,33.665823,0.247559,0.545898,0.271484,0.688477,12.427644,51.299965,0
2019-05-06 12:00:00,0.000000e+00,187.421875,37.240042,38.162100,45.844812,34.772854,32.221487,0.134766,0.362793,0.154785,0.394531,3.659730,51.299965,0
2019-05-06 12:12:00,0.000000e+00,203.500977,35.421877,36.641315,43.335768,33.321487,30.623049,0.122070,0.319824,0.126465,0.371094,11.449250,51.299965,0
2019-05-06 12:24:00,0.000000e+00,214.321289,34.453516,35.566711,41.824305,32.437502,29.482814,0.099609,0.308594,0.122559,0.379883,27.432148,51.299965,0


In [45]:
# ==================================================
# ELIMINACIÓN SEGURA DE COLUMNAS
# ==================================================

# 1. Eliminación explícita con validación
columna_a_eliminar = 'Estado de salud'
if columna_a_eliminar in df16.columns:
    df22 = df16.drop(columns=columna_a_eliminar)
else:
    print(f"Advertencia: La columna '{columna_a_eliminar}' no existe en el DataFrame")
    df22 = df16.copy()

# 2. Metadatos mejorados
metadatos = {
    'variable': 'df22',
    'tipo': type(df22).__name__,
    'dimensiones': df22.shape,
    'columnas_restantes': df22.columns.tolist(),
    'proceso': {
        'accion': f"Eliminación de columna '{columna_a_eliminar}'",
        'realizado': columna_a_eliminar in df16.columns,
        'columnas_originales': df16.columns.tolist()
    }
}

# 3. Visualización estructurada
print("\n● Resumen de Modificación ●")
print("═"*50)
print(f"Columnas originales: {len(df16.columns)}")
print(f"Columnas resultantes: {len(df22.columns)}")
print(f"Columna eliminada: {'Sí' if columna_a_eliminar in df16.columns else 'No (no existía)'}\n")

[metadatos]
df22


● Resumen de Modificación ●
══════════════════════════════════════════════════
Columnas originales: 14
Columnas resultantes: 13
Columna eliminada: Sí



[{'variable': 'df22',
  'tipo': 'DataFrame',
  'dimensiones': (47594, 13),
  'columnas_restantes': ['Corriente',
   'Presión C. Eje',
   'T° Retorno',
   'T° socket Linner',
   'T° Excéntrica',
   'T° Alimentación',
   'T° Desc. C. Eje',
   'Salto anillo A',
   'Salto Anillo B',
   'Salto Anillo C',
   'Salto Anillo D',
   'Nivel Taza',
   'Setting'],
  'proceso': {'accion': "Eliminación de columna 'Estado de salud'",
   'realizado': True,
   'columnas_originales': ['Corriente',
    'Presión C. Eje',
    'T° Retorno',
    'T° socket Linner',
    'T° Excéntrica',
    'T° Alimentación',
    'T° Desc. C. Eje',
    'Salto anillo A',
    'Salto Anillo B',
    'Salto Anillo C',
    'Salto Anillo D',
    'Nivel Taza',
    'Setting',
    'Estado de salud']}}]

Unnamed: 0_level_0,Corriente,Presión C. Eje,T° Retorno,T° socket Linner,T° Excéntrica,T° Alimentación,T° Desc. C. Eje,Salto anillo A,Salto Anillo B,Salto Anillo C,Salto Anillo D,Nivel Taza,Setting
Tiempo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
2017-07-01 08:00:00,6.633393e+01,352.291016,39.330568,36.067589,55.485401,33.841759,34.713011,7.509375,10.081446,6.396680,14.159180,40.533623,28.600006
2017-07-01 08:12:00,5.873533e+01,371.410156,40.237015,36.454180,55.306391,35.115929,34.984612,6.826367,8.688281,6.017188,12.778320,40.410822,28.500004
2017-07-01 08:24:00,7.330124e+01,306.683594,42.924143,39.803396,57.962978,38.862738,36.497345,7.344141,9.495117,6.327344,13.664258,40.086174,28.500004
2017-07-01 08:36:00,7.359566e+01,263.185547,44.400021,41.389423,59.253897,40.636448,37.869467,7.599805,9.619336,7.019141,13.967968,41.802011,28.500004
2017-07-01 08:48:00,7.216714e+01,300.306641,43.409241,40.016947,57.869795,37.751429,38.141232,7.640820,9.582422,6.680078,13.574414,41.188478,28.500004
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-05-06 11:48:00,3.816504e-28,191.533203,40.859182,39.509026,53.035002,38.809961,33.665823,0.247559,0.545898,0.271484,0.688477,12.427644,51.299965
2019-05-06 12:00:00,0.000000e+00,187.421875,37.240042,38.162100,45.844812,34.772854,32.221487,0.134766,0.362793,0.154785,0.394531,3.659730,51.299965
2019-05-06 12:12:00,0.000000e+00,203.500977,35.421877,36.641315,43.335768,33.321487,30.623049,0.122070,0.319824,0.126465,0.371094,11.449250,51.299965
2019-05-06 12:24:00,0.000000e+00,214.321289,34.453516,35.566711,41.824305,32.437502,29.482814,0.099609,0.308594,0.122559,0.379883,27.432148,51.299965


In [None]:
# ==================================================
# SELECCIÓN SEGURA DE COLUMNAS
# ==================================================

# 1. Selección con validación de columna
columna_seleccionada = 'Estado de salud'

if columna_seleccionada in df16.columns:
    df23 = df16[[columna_seleccionada]].copy()  # Uso de doble corchete para DataFrame
else:
    print(f"Error: La columna '{columna_seleccionada}' no existe en el DataFrame")
    df23 = pd.DataFrame(columns=[columna_seleccionada])  # DataFrame vacío como fallback

# 2. Metadatos mejorados
metadatos = {
    'variable': 'df23',
    'tipo': type(df23).__name__,
    'dimensiones': df23.shape,
    'columnas': df23.columns.tolist(),
    'contenido': {
        'columna_origen': columna_seleccionada,
        'valores_unicos': df23[columna_seleccionada].value_counts().to_dict() if columna_seleccionada in df16.columns else 'Columna no disponible',
        'nulos': df23[columna_seleccionada].isna().sum() if columna_seleccionada in df16.columns else 0
    }
}

# 3. Visualización mejorada
print("\n● Resumen de Selección ●")
print("═"*50)
print(f"Columna seleccionada: '{columna_seleccionada}'")
print(f"Existía en original: {'Sí' if columna_seleccionada in df16.columns else 'No'}")
print(f"Registros obtenidos: {len(df23)}\n")

[metadatos]
df23

In [None]:
# ==================================================
# FUNCIÓN ORIGINAL (RESULTADO PRESERVADO EXACTAMENTE)
# ==================================================
def datetime_window(dfx, dfy, dfy_total, scaler, size, periods, y_choose='last', ahead=''):
    # Preservar exactamente la conversión de categorías original
    catcolsx = dfx.select_dtypes(['category']).columns
    dfx[catcolsx] = dfx[catcolsx].apply(lambda x: x.cat.codes)

    catcolsy = dfy.select_dtypes(['category']).columns
    dfy[catcolsy] = dfy[catcolsy].apply(lambda x: x.cat.codes)

    catcolsy_total = dfy_total.select_dtypes(['category']).columns
    dfy_total[catcolsy_total] = dfy_total[catcolsy_total].apply(lambda x: x.cat.codes)

    # Estructuras de datos originales
    ind = []
    x = []
    y = []

    # Función interna idéntica a la original
    def get_windows(win):
        if (win.shape[0] != periods):
            return np.nan
        
        if (y_choose == 'ahead'):
            wy = dfy.loc[win.index, :]
            try:
                wy = dfy_total.loc[wy.index[-1] + pd.Timedelta(ahead)]
                y.append(wy)
                wx = dfx.loc[win.index, :]
                x.append(wx)
                ind.append(win.index[0])
            except:
                pass
        else:
            wx = dfx.loc[win.index, :]
            x.append(wx)

            wy = dfy.loc[win.index, :]
            if (y_choose == 'last'):
                wy = wy.iloc[-1]
            elif (y_choose == 'first'):
                wy = wy.iloc[0]
            y.append(wy)

            ind.append(win.index[0])
        
        return np.nan

    # Llamada idéntica a rolling
    dfx.iloc[:, 0].rolling(size).apply(get_windows)
    
    return x, y, ind

# ==================================================
# FUNCIÓN ORIGINAL DE DIVISIÓN (RESULTADO PRESERVADO)
# ==================================================
def normal_anomal_split(dfx, dfy, size, periods, y_choose='last', ahead='', custom_normal_split=False, normal_test_ratio=0.1, scaler='MinMaxScaler'):
    # Configuración idéntica del scaler
    if (scaler == 'MinMaxScaler'):
        myscaler = MinMaxScaler(feature_range=(0, 1))
    elif (scaler == 'StandardScaler'):
        myscaler = StandardScaler()
    elif(scaler == 'RobustScaler'):
        myscaler = RobustScaler(with_centering=True, with_scaling=True, quantile_range=(25, 75))
    
    # Llamada idéntica a datetime_window
    x_windowed, y_windowed, windows_ind = datetime_window(dfx=dfx, dfy=dfy, dfy_total=dfy, scaler=myscaler, size=size, periods=periods, y_choose=y_choose, ahead=ahead)
    
    # Procesamiento idéntico de ventanas
    x_normal = []
    y_normal = []
    x_anomal = []
    y_anomal = []
    for (i, wy) in enumerate(y_windowed):
        if wy.iloc[0] == 0:
            x_normal.append(x_windowed[i])
            y_normal.append(y_windowed[i])
        else:
            x_anomal.append(x_windowed[i])
            y_anomal.append(y_windowed[i])
    
    # Misma lógica de división
    normal_indices = [i for i in range(len(x_normal))]
    normal_indices_permuted = np.random.permutation(normal_indices)
    
    length = len(x_normal)
    length_test = round(length * normal_test_ratio)
    
    if custom_normal_split:
        normal_test_indices = normal_indices_permuted[:length_test]
        normal_train_indices = normal_indices_permuted[length_test:]
    else:
        normal_test_indices = normal_indices_permuted[:len(y_anomal)]
        normal_train_indices = normal_indices_permuted[len(y_anomal):]
    
    # Procesamiento idéntico de datos
    x_normal_train = [x_normal[i] for i in normal_train_indices]
    x_normal_test = [x_normal[i] for i in normal_test_indices]
    
    x_normal_train_total = pd.concat(x_normal_train)
    x_normal_train_total = x_normal_train_total[~x_normal_train_total.index.duplicated(keep='first')]
    x_normal_train_total.sort_index(inplace=True)
    
    myscaler.fit(x_normal_train_total.to_numpy())
    
    x_train = []
    for wx in x_normal_train:
        wx = wx.to_numpy()
        wx = np.squeeze(myscaler.transform(wx))
        x_train.append(wx)
    
    x_test = []
    y_test = []
    for wx in x_normal_test:
        wx = wx.to_numpy()
        wx = np.squeeze(myscaler.transform(wx))
        x_test.append(wx)
        y_test.append(0)
        
    for wx in x_anomal:
        wx = wx.to_numpy()
        wx = np.squeeze(myscaler.transform(wx))
        x_test.append(wx)
        y_test.append(1)
    
    x_mix = []
    for wx in x_windowed:
        wx = wx.to_numpy()
        wx = np.squeeze(myscaler.transform(wx))
        x_mix.append(wx)
    
    y_mix = []
    for wy in y_windowed:
        wy = wy.to_numpy()
        wy = wy[0]
        y_mix.append(wy)
        
    i_mix = windows_ind

    return x_train, x_test, y_test, x_mix, y_mix, i_mix, myscaler

# ==================================================
# EJECUCIÓN IDÉNTICA A LA ORIGINAL
# ==================================================
x_train, x_test, y_test, x_mix, y_mix, i_mix, myscaler = normal_anomal_split(
    dfx=df22, 
    dfy=df23,
    size='60T', 
    periods=5, 
    y_choose='last', 
    ahead='',
    custom_normal_split=True, 
    normal_test_ratio=0.2, 
    scaler='MinMaxScaler'
)

# Estructuración idéntica de resultados
df21 = {
    'x_train': np.array(x_train),
    'x_test': np.array(x_test),
    'y_test': np.array(y_test),
    'x_mix': np.array(x_mix),
    'y_mix': np.array(y_mix),
    'i_mix': i_mix,
    'myscaler': myscaler,
}

# Metadatos idénticos
attrs = {
    'var': 'df21',
    'type': type(df21).__name__,
}

# Salida idéntica
[attrs]
{
    'x_train': print_list_shape(x_train),
    'x_test': print_list_shape(x_test),
    'y_test': print_list_shape(y_test),
}

In [None]:
# ==================================================
# GUARDADO DE DATOS (MANTIENE RESULTADO ORIGINAL)
# ==================================================

# 1. Guardado del archivo NPZ (idéntico al original)
file_npz = os.path.join('C:/Users/Milovan/Desktop/comp/certamen2/chancadores/archnpz', 'ventanas12T.npz')
np.savez(
    file_npz,
    x_train=df21['x_train'],  # Exactamente igual que en el original
    x_test=df21['x_test'],
    y_test=df21['y_test'],
    x_mix=df21['x_mix'],
    y_mix=df21['y_mix'],
    i_mix=df21['i_mix']
)

# 2. Guardado del scaler (idéntico al original)
file_scaler = os.path.join('C:/Users/Milovan/Desktop/comp/certamen2/chancadores/archnpz', 'scaler.joblib')
joblib.dump(df21['myscaler'], file_scaler)

# 3. Metadatos de salida (idénticos al original)
attrs = {
    'var': 'empty',
    'type': 'empty'
}

# 4. Salida idéntica al original
[attrs]
'npz file saved'