## Load libraries

In [33]:
import os
import requests
import pandas as pd
import numpy as np
#import matplotlib.pyplot as plt # Pendiente instalar

import tensorflow as tf
import tensorflow_data_validation as tfdv
from tensorflow_metadata.proto.v0 import schema_pb2

from tfx.components import CsvExampleGen
from tfx.components import ExampleValidator
from tfx.components import SchemaGen
from tfx.components import StatisticsGen
from tfx.components import Transform

from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from google.protobuf.json_format import MessageToDict
import pprint
from sklearn.feature_selection import SelectKBest, chi2
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.preprocessing import OneHotEncoder

## Load Data

In [15]:
## download the dataset
# Directory of the raw data files
_data_root = './data/covertype'
# Path to the raw training data
_data_filepath = os.path.join(_data_root, 'covertype_train.csv')
# Download data
os.makedirs(_data_root, exist_ok=True)
if not os.path.isfile(_data_filepath):
 #https://archive.ics.uci.edu/ml/machine-learning-databases/covtype/
 url = 'https://docs.google.com/uc?export= \
 download&confirm={{VALUE}}&id=1lVF1BCWLH4eXXV_YOJzjR7xZjj-wAGj9'
 r = requests.get(url, allow_redirects=True, stream=True)
 open(_data_filepath, 'wb').write(r.content)

# Read the CSV file into a DataFrame
if os.path.exists(_data_filepath):
    df = pd.read_csv(_data_filepath)
    print("Dataframe was loaded")  # Display the first few rows
else:
    print(f"File not found: {_data_filepath}")

Dataframe was loaded


## Feature Selection

### Data Exploration

In [16]:
# Se identifica el tipo de dato de cada una de las variables
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116203 entries, 0 to 116202
Data columns (total 13 columns):
 #   Column                              Non-Null Count   Dtype 
---  ------                              --------------   ----- 
 0   Elevation                           116203 non-null  int64 
 1   Aspect                              116203 non-null  int64 
 2   Slope                               116203 non-null  int64 
 3   Horizontal_Distance_To_Hydrology    116203 non-null  int64 
 4   Vertical_Distance_To_Hydrology      116203 non-null  int64 
 5   Horizontal_Distance_To_Roadways     116203 non-null  int64 
 6   Hillshade_9am                       116203 non-null  int64 
 7   Hillshade_Noon                      116203 non-null  int64 
 8   Hillshade_3pm                       116203 non-null  int64 
 9   Horizontal_Distance_To_Fire_Points  116203 non-null  int64 
 10  Wilderness_Area                     116203 non-null  object
 11  Soil_Type                           116

In [17]:
# Se generan dos listas segun las caracteristicas de las variables para facilitar la exploracion
quantitative_variables = ['Elevation', 'Aspect', 'Slope', 'Horizontal_Distance_To_Hydrology',
       'Vertical_Distance_To_Hydrology', 'Horizontal_Distance_To_Roadways',
       'Hillshade_9am', 'Hillshade_Noon', 'Hillshade_3pm',
       'Horizontal_Distance_To_Fire_Points']

categorical_variables = ['Wilderness_Area', 'Soil_Type',
       'Cover_Type']

#### Quantitative Variables

In [18]:
# Se exploran las variables cuantitativas mediantes metricas de estadistica basica
df[quantitative_variables].describe()

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points
count,116203.0,116203.0,116203.0,116203.0,116203.0,116203.0,116203.0,116203.0,116203.0,116203.0
mean,2958.442553,155.611869,14.126572,268.815297,46.54089,2344.471993,212.08011,223.328537,142.583117,1974.181002
std,280.264645,111.826494,7.517868,212.401029,58.650981,1559.288208,26.956645,19.796919,38.365816,1316.012073
min,1860.0,0.0,0.0,0.0,-166.0,0.0,0.0,0.0,0.0,0.0
25%,2808.0,59.0,9.0,108.0,7.0,1101.0,198.0,213.0,119.0,1024.0
50%,2996.0,127.0,13.0,218.0,29.0,1986.0,218.0,226.0,143.0,1705.0
75%,3163.0,261.0,18.0,384.0,69.0,3311.0,231.0,237.0,168.0,2543.0
max,3858.0,360.0,66.0,1397.0,598.0,7116.0,254.0,254.0,253.0,7168.0


In [19]:
corr_matrix = df[quantitative_variables].corr()
# Plot the heatmap
#plt.figure(figsize=(12, 8))
#sns.heatmap(corr_matrix, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
#plt.title("Correlation Matrix of Quantitative Variables")
#plt.show()

"""
No se identifican correlaciones directas o inversas mayores a 0.8, por consiguiente, no se descartan variables cuantitativas por correlacion
altamente significativa
"""
corr_matrix

Unnamed: 0,Elevation,Aspect,Slope,Horizontal_Distance_To_Hydrology,Vertical_Distance_To_Hydrology,Horizontal_Distance_To_Roadways,Hillshade_9am,Hillshade_Noon,Hillshade_3pm,Horizontal_Distance_To_Fire_Points
Elevation,1.0,0.013638,-0.246626,0.306427,0.089746,0.368821,0.118341,0.20927,0.056217,0.153658
Aspect,0.013638,1.0,0.08056,0.01755,0.070517,0.024409,-0.576595,0.335843,0.646172,-0.111924
Slope,-0.246626,0.08056,1.0,-0.011202,0.276691,-0.21841,-0.333211,-0.527148,-0.170408,-0.185317
Horizontal_Distance_To_Hydrology,0.306427,0.01755,-0.011202,1.0,0.610122,0.074821,-0.026795,0.04638,0.052295,0.05177
Vertical_Distance_To_Hydrology,0.089746,0.070517,0.276691,0.610122,1.0,-0.044835,-0.170377,-0.111661,0.038072,-0.070214
Horizontal_Distance_To_Roadways,0.368821,0.024409,-0.21841,0.074821,-0.044835,1.0,0.037724,0.190562,0.104052,0.331464
Hillshade_9am,0.118341,-0.576595,-0.333211,-0.026795,-0.170377,0.037724,1.0,0.01368,-0.780337,0.135143
Hillshade_Noon,0.20927,0.335843,-0.527148,0.04638,-0.111661,0.190562,0.01368,1.0,0.590911,0.056642
Hillshade_3pm,0.056217,0.646172,-0.170408,0.052295,0.038072,0.104052,-0.780337,0.590911,1.0,-0.050605
Horizontal_Distance_To_Fire_Points,0.153658,-0.111924,-0.185317,0.05177,-0.070214,0.331464,0.135143,0.056642,-0.050605,1.0


In [20]:
for i in categorical_variables:
    print(f"variable: {i}")
    print(df[i].value_counts())
    print("--------------------")
"""
Las variables categoricas estan altamente desbalaceadas, en especial "Wilderness_Area" y "Cover_Type" se concentran principalmente en 2 variables.
Por su parte, "Soil_Type" se concentra principalmente en 4 variables, por la cantidad de categorias, hay algunas con pocos valores.
"""
    

variable: Wilderness_Area
Rawah        52006
Commanche    50759
Cache         7525
Neota         5913
Name: Wilderness_Area, dtype: int64
--------------------
variable: Soil_Type
C7745    22912
C7202    11560
C7756    10539
C7757     9178
C7201     6690
C4703     6541
C7746     6068
C4744     5961
C7755     5130
C7700     4244
C4758     3443
C8771     3113
C8772     2720
C2705     2488
C4704     2396
C7102     1881
C8776     1729
C2703     1478
C2717     1378
C2704      977
C7101      787
C6102      690
C2702      592
C6101      565
C7702      539
C8703      385
C6731      375
C7790      324
C2706      322
C4201      257
C7709      242
C7710      194
C7103      156
C5101      123
C7701      101
C8708       63
C3502       24
C3501       19
C8707       18
C5151        1
Name: Soil_Type, dtype: int64
--------------------
variable: Cover_Type
1    56720
0    42307
2     7228
6     4045
5     3478
4     1892
3      533
Name: Cover_Type, dtype: int64
--------------------


'\nLas variables categoricas estan altamente desbalaceadas, en especial "Wilderness_Area" y "Cover_Type" se concentran principalmente en 2 variables.\nPor su parte, "Soil_Type" se concentra principalmente en 4 variables, por la cantidad de categorias, hay algunas con pocos valores.\n'

In [21]:
encoder = OneHotEncoder(sparse_output=False)
one_hot_encoded = encoder.fit_transform(df[categorical_variables])

In [22]:
# Convertir el resultado en un DataFrame
column_names = encoder.get_feature_names_out(df[categorical_variables].columns)
onehot_df = pd.DataFrame(one_hot_encoded, columns=column_names)

# Mostrar el resultado
print(onehot_df)

        Wilderness_Area_Cache  Wilderness_Area_Commanche  \
0                         0.0                        1.0   
1                         0.0                        1.0   
2                         0.0                        0.0   
3                         0.0                        0.0   
4                         0.0                        0.0   
...                       ...                        ...   
116198                    0.0                        1.0   
116199                    0.0                        0.0   
116200                    0.0                        0.0   
116201                    0.0                        0.0   
116202                    0.0                        1.0   

        Wilderness_Area_Neota  Wilderness_Area_Rawah  Soil_Type_C2702  \
0                         0.0                    0.0              0.0   
1                         0.0                    0.0              0.0   
2                         0.0                    1.0        

# Configurar el contexto iterativo

In [23]:
from tfx.orchestration.experimental.interactive.interactive_context import InteractiveContext
from tfx.components import CsvExampleGen, StatisticsGen, SchemaGen,ExampleValidator
from tfx.v1.components import ImportSchemaGen

# Especifica la ruta al archivo de base de datos de metadatos SQLite#
metadata_db_uri = 'metadata'

context = InteractiveContext(pipeline_root = metadata_db_uri)



## Generando un ejemplo con los datos

In [24]:
example_gen = CsvExampleGen(input_base=_data_root)
context.run(example_gen)

0,1
.execution_id,13
.component,"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } CsvExampleGen at 0x79b0d6769630.inputs{}.outputs['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x79b0d676ad10.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0.exec_properties['input_base']./data/covertype['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:6405459,xor_checksum:1740538253,sum_checksum:1740538253"
.component.inputs,{}
.component.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x79b0d676ad10.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0"

0,1
.inputs,{}
.outputs,"['examples'] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x79b0d676ad10.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0"
.exec_properties,"['input_base']./data/covertype['input_config']{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }['output_config']{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }['output_data_format']6['output_file_format']5['custom_config']None['range_config']None['span']0['version']None['input_fingerprint']split:single_split,num_files:1,total_bytes:6405459,xor_checksum:1740538253,sum_checksum:1740538253"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x79b0d676ad10.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,metadata/CsvExampleGen/examples/13
.span,0
.split_names,"[""train"", ""eval""]"
.version,0

0,1
['input_base'],./data/covertype
['input_config'],"{  ""splits"": [  {  ""name"": ""single_split"",  ""pattern"": ""*""  }  ] }"
['output_config'],"{  ""split_config"": {  ""splits"": [  {  ""hash_buckets"": 2,  ""name"": ""train""  },  {  ""hash_buckets"": 1,  ""name"": ""eval""  }  ]  } }"
['output_data_format'],6
['output_file_format'],5
['custom_config'],
['range_config'],
['span'],0
['version'],
['input_fingerprint'],"split:single_split,num_files:1,total_bytes:6405459,xor_checksum:1740538253,sum_checksum:1740538253"

0,1
['examples'],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Channel of type 'Examples' (1 artifact) at 0x79b0d676ad10.type_nameExamples._artifacts[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0"

0,1
.type_name,Examples
._artifacts,"[0] function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0"

0,1
[0],"function toggleTfxObject(element) {  var objElement = element.parentElement;  if (objElement.classList.contains('collapsed')) {  objElement.classList.remove('collapsed');  objElement.classList.add('expanded');  } else {  objElement.classList.add('collapsed');  objElement.classList.remove('expanded');  } } Artifact of type 'Examples' (uri: metadata/CsvExampleGen/examples/13) at 0x79b14d185360.type<class 'tfx.types.standard_artifacts.Examples'>.urimetadata/CsvExampleGen/examples/13.span0.split_names[""train"", ""eval""].version0"

0,1
.type,<class 'tfx.types.standard_artifacts.Examples'>
.uri,metadata/CsvExampleGen/examples/13
.span,0
.split_names,"[""train"", ""eval""]"
.version,0


## Cargando estadísticas del ejemplo

In [25]:
# Dataset statistics
statistics_gen = StatisticsGen(
      examples=example_gen.outputs['examples']
      )
context.run(statistics_gen)
context.show(statistics_gen.outputs['statistics'])

## Inferir el esquema

In [26]:
# Infer schema from the statistics output
schema_gen = SchemaGen(
    statistics=statistics_gen.outputs['statistics'])
context.run(schema_gen)
context.show(schema_gen.outputs['schema'])

Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'Aspect',INT,required,,-
'Cover_Type',INT,required,,-
'Elevation',INT,required,,-
'Hillshade_3pm',INT,required,,-
'Hillshade_9am',INT,required,,-
'Hillshade_Noon',INT,required,,-
'Horizontal_Distance_To_Fire_Points',INT,required,,-
'Horizontal_Distance_To_Hydrology',INT,required,,-
'Horizontal_Distance_To_Roadways',INT,required,,-
'Slope',INT,required,,-


Unnamed: 0_level_0,Values
Domain,Unnamed: 1_level_1
'Soil_Type',"'C2702', 'C2703', 'C2704', 'C2705', 'C2706', 'C2717', 'C3501', 'C3502', 'C4201', 'C4703', 'C4704', 'C4744', 'C4758', 'C5101', 'C6101', 'C6102', 'C6731', 'C7101', 'C7102', 'C7103', 'C7201', 'C7202', 'C7700', 'C7701', 'C7702', 'C7709', 'C7710', 'C7745', 'C7746', 'C7755', 'C7756', 'C7757', 'C7790', 'C8703', 'C8707', 'C8708', 'C8771', 'C8772', 'C8776', 'C5151'"
'Wilderness_Area',"'Cache', 'Commanche', 'Neota', 'Rawah'"


## Curar el esquema

In [38]:
schema = tfdv.load_schema_text('metadata/SchemaGen/schema/5/schema.pbtxt')
tfdv.set_domain(schema, "Hillshade_9am", schema_pb2.IntDomain(min=0,max=255))
tfdv.set_domain(schema, "Hillshade_Noon", schema_pb2.IntDomain(min=0,max=255))
tfdv.set_domain(schema, "Slope", schema_pb2.IntDomain(min=0,max=90))
tfdv.set_domain(schema, "Cover_Type", schema_pb2.StringDomain(value =['0','1','2','3','4','5','6']))
tfdv.display_schema(schema=schema)

Unnamed: 0_level_0,Type,Presence,Valency,Domain
Feature name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
'Aspect',INT,required,,-
'Cover_Type',INT,required,,'Cover_Type_domain'
'Elevation',INT,required,,-
'Hillshade_3pm',INT,required,,-
'Hillshade_9am',INT,required,,min: 0; max: 255
'Hillshade_Noon',INT,required,,min: 0; max: 255
'Horizontal_Distance_To_Fire_Points',INT,required,,-
'Horizontal_Distance_To_Hydrology',INT,required,,-
'Horizontal_Distance_To_Roadways',INT,required,,-
'Slope',INT,required,,min: 0; max: 90


Unnamed: 0_level_0,Values
Domain,Unnamed: 1_level_1
'Soil_Type',"'C2702', 'C2703', 'C2704', 'C2705', 'C2706', 'C2717', 'C3501', 'C3502', 'C4201', 'C4703', 'C4704', 'C4744', 'C4758', 'C5101', 'C6101', 'C6102', 'C6731', 'C7101', 'C7102', 'C7103', 'C7201', 'C7202', 'C7700', 'C7701', 'C7702', 'C7709', 'C7710', 'C7745', 'C7746', 'C7755', 'C7756', 'C7757', 'C7790', 'C8703', 'C8707', 'C8708', 'C8771', 'C8772', 'C8776', 'C5151'"
'Wilderness_Area',"'Cache', 'Commanche', 'Neota', 'Rawah'"
'Cover_Type_domain',"'0', '1', '2', '3', '4', '5', '6'"


In [None]:
schema_gen.default_environment.append('TRAINING')
schema_gen.default_environment.append('SERVING')

# 5.1 Metadatos de aprendizaje automático

In [18]:
from ml_metadata import metadata_store
from ml_metadata.proto import metadata_store_pb2

In [17]:
# Ruta al almacen de metadatos
metadata_connection_config = context.metadata_connection_config

In [19]:
# Creación de una instancia de MetadataStore
store = metadata_store.MetadataStore(metadata_connection_config)

In [20]:
# Recuperar y mostrar todos los tipos de artefactos
artifact_types = store.get_artifact_types()
for artifact_type in artifact_types:
    print(artifact_type.name)

Examples
ExampleStatistics
Schema


In [21]:
# Obtener artefactos de tipo 'Schema'
schema_artifacts = store.get_artifacts_by_type('Schema')

# Mostrar detalles de los artefactos Schema
for artifact in schema_artifacts:
    print(f"Artifact ID: {artifact.id}")
    print(f"URI: {artifact.uri}")

Artifact ID: 3
URI: metadata/SchemaGen/schema/3


In [22]:
if schema_artifacts:
    first_schema_artifact = schema_artifacts[0]
    print(f"Name: {first_schema_artifact.name}")
    print(f"State: {first_schema_artifact.state}")
    print(f"Type: {first_schema_artifact.type_id}")

Name: schema:2025-02-26T02:19:48.957331
State: 2
Type: 18


# 5.2 Seguimiento de artefactos

In [23]:
def get_artifacts_details(store, type_name):
    artifacts = store.get_artifacts_by_type(type_name)
    data = []
    
    for artifact in artifacts:
        data.append({
            'Artifact ID': artifact.id,
            'Type': type_name,
            'URI': artifact.uri
        })

    df = pd.DataFrame(data)
    return df

# 5.3 Obtener artefactos principales

In [25]:
# Se obtienen los artefactos principales para 'Examples'
get_artifacts_details(store, 'Examples')

Unnamed: 0,Artifact ID,Type,URI
0,1,Examples,metadata/CsvExampleGen/examples/1


In [26]:
# Se obtienen los artefactos principales para 'Schema'
get_artifacts_details(store, 'Schema')

Unnamed: 0,Artifact ID,Type,URI
0,3,Schema,metadata/SchemaGen/schema/3
