# Creating Covertype schema

In [1]:
import tensorflow_data_validation as tfdv
from google.protobuf.json_format import MessageToDict, MessageToJson, ParseDict
from tensorflow_metadata.proto.v0 import schema_pb2

### Define a schema as JSON dictionary

In [4]:
schema_dict = {
    'feature': [
        {'name': 'Elevation', 'type': 'FLOAT'},
        {'name': 'Aspect', 'type': 'FLOAT'},
        {'name': 'Slope', 'type': 'FLOAT'},
        {'name': 'Horizontal_Distance_To_Hydrology', 'type': 'FLOAT'},
        {'name': 'Vertical_Distance_To_Hydrology', 'type': 'FLOAT'},
        {'name': 'Horizontal_Distance_To_Roadways', 'type': 'FLOAT'},
        {'name': 'Hillshade_9am', 'type': 'FLOAT'},
        {'name': 'Hillshade_Noon', 'type': 'FLOAT'},
        {'name': 'Hillshade_3pm', 'type': 'FLOAT'},
        {'name': 'Horizontal_Distance_To_Fire_Points', 'type': 'FLOAT'},
        {'name': 'Wilderness_Area', 'type': 'BYTES', 'domain': 'Wilderness_Area'},
        {'name': 'Soil_Type', 'type': 'BYTES', 'domain': 'Soil_Type'}, ],
    'stringDomain': [
        {'name': 'Soil_Type',
        'value': ['2702', '2703', '2704', '2705', '2706', '2717',
                  '3501', '3502', '4201', '4703', '4704', '4744',
                  '4758', '5101', '5151', '6101', '6102', '6731',
                  '7101', '7102', '7103', '7201', '7202', '7700',
                  '7701', '7702', '7709', '7710', '7745', '7746',
                  '7755', '7756', '7757', '7790', '8703', '8707',
                  '8708', '8771', '8772', '8776']},
      {'name': 'Wilderness_Area',
        'value': ['Cache', 'Commanche', 'Neota', 'Rawah']}]}

### Convert the schema dictionary to a protocol buffer

In [5]:
schema = schema_pb2.Schema() 
ParseDict(schema_dict, schema)
schema

feature {
  name: "Elevation"
  type: FLOAT
}
feature {
  name: "Aspect"
  type: FLOAT
}
feature {
  name: "Slope"
  type: FLOAT
}
feature {
  name: "Horizontal_Distance_To_Hydrology"
  type: FLOAT
}
feature {
  name: "Vertical_Distance_To_Hydrology"
  type: FLOAT
}
feature {
  name: "Horizontal_Distance_To_Roadways"
  type: FLOAT
}
feature {
  name: "Hillshade_9am"
  type: FLOAT
}
feature {
  name: "Hillshade_Noon"
  type: FLOAT
}
feature {
  name: "Hillshade_3pm"
  type: FLOAT
}
feature {
  name: "Horizontal_Distance_To_Fire_Points"
  type: FLOAT
}
feature {
  name: "Wilderness_Area"
  type: BYTES
  domain: "Wilderness_Area"
}
feature {
  name: "Soil_Type"
  type: BYTES
  domain: "Soil_Type"
}
string_domain {
  name: "Soil_Type"
  value: "2702"
  value: "2703"
  value: "2704"
  value: "2705"
  value: "2706"
  value: "2717"
  value: "3501"
  value: "3502"
  value: "4201"
  value: "4703"
  value: "4704"
  value: "4744"
  value: "4758"
  value: "5101"
  value: "5151"
  value: "6101"
  v

### Save the schema to a text protobuf

In [6]:
schema_file = 'schema.pbtxt'
tfdv.write_schema_text(schema, schema_file)

### Upload the schema to GCS

In [7]:
gcs_location = 'gs://mlops-dev-workspace/drift_monitor/schema/'

!gsutil cp {schema_file} {gcs_location}

Copying file://schema.pbtxt [Content-Type=application/octet-stream]...

Operation completed over 1 objects/1.5 KiB.                                      


In [8]:
!gsutil cat {gcs_location}{schema_file}

feature {
  name: "Elevation"
  type: FLOAT
}
feature {
  name: "Aspect"
  type: FLOAT
}
feature {
  name: "Slope"
  type: FLOAT
}
feature {
  name: "Horizontal_Distance_To_Hydrology"
  type: FLOAT
}
feature {
  name: "Vertical_Distance_To_Hydrology"
  type: FLOAT
}
feature {
  name: "Horizontal_Distance_To_Roadways"
  type: FLOAT
}
feature {
  name: "Hillshade_9am"
  type: FLOAT
}
feature {
  name: "Hillshade_Noon"
  type: FLOAT
}
feature {
  name: "Hillshade_3pm"
  type: FLOAT
}
feature {
  name: "Horizontal_Distance_To_Fire_Points"
  type: FLOAT
}
feature {
  name: "Wilderness_Area"
  type: BYTES
  domain: "Wilderness_Area"
}
feature {
  name: "Soil_Type"
  type: BYTES
  domain: "Soil_Type"
}
string_domain {
  name: "Soil_Type"
  value: "2702"
  value: "2703"
  value: "2704"
  value: "2705"
  value: "2706"
  value: "2717"
  value: "3501"
  value: "3502"
  value: "4201"
  value: "4703"
  value: "4704"
  value: "4744"
  value: "4758"
  value: "5101"
  value: "5151"
  value: "6101"
  v