In [1]:
from dotenv import load_dotenv

In [2]:
env_loaded = load_dotenv('../../.env-local')
env_loaded

True

In [3]:
from datadocai.models import CurrentTable

TRINO_CATALOG = 'postgres'
TRINO_SCHEMA = 'public'
TRINO_TABLE = 'house_pricing'

# construct the table you want to analyse
ct = CurrentTable(trino_catalog=TRINO_CATALOG,
                  trino_schema=TRINO_SCHEMA,
                  trino_table=TRINO_TABLE)

In [4]:
import os
from datadocai.database import DatabaseClient

# connect to trino
dc = DatabaseClient(host=os.getenv('TRINO_HOST'),
                    port=os.getenv('TRINO_PORT'),
                    user=os.getenv('TRINO_USER'),
                    password=os.getenv('TRINO_PASSWORD'),
                    certificate="../../docker/trino/certificate.pem")

In [5]:
from langchain_ollama import ChatOllama

api_base = "http://localhost:11434"
model = "mistral-nemo:12b-instruct-2407-q4_0"

llm = ChatOllama(api_base=api_base, model=model, temperature = 0, verbose=True)
output_llm = ChatOllama(api_base=api_base, model=model, format='json', temperature = 0, verbose=True)

In [6]:
from datadocai.metadata import TableMetadataManager
from datadocai.metadata.exporter.trino import MetadataTrinoExporter

metadata_exporter = MetadataTrinoExporter(current_table=ct, database_client=dc)

tmm = TableMetadataManager(current_table=ct, database_client=dc, llm=llm, output_llm=output_llm, local_llm=True, metadata_exporter=metadata_exporter)

In [7]:
# launch the process
result, exporter_result = tmm.process()

  prompt = loads(json.dumps(prompt_object.manifest))


postgres.public.house_pricing Set Table Documentation: The `house_pricing` table stores comprehensive data about real estate properties, including their prices, locations, features, and listing status. This information is valuable for market analysis, trend identification, property valuation, investment decisions, and other related purposes.
postgres.public.house_pricing Set Documentation for column id: description='A unique identifier assigned to each record in the table. It serves as a primary key for indexing and referencing purposes.'
postgres.public.house_pricing Set Documentation for column address: description='The street address of the house.'
postgres.public.house_pricing Set Documentation for column city: description='The city where the house is located.'
postgres.public.house_pricing Set Documentation for column state: description='The state abbreviation where the house is located (e.g., CA, NY).'
postgres.public.house_pricing Set Documentation for column zip_code: descripti

## Show if the description is set

In [9]:
## Show the table informations
cursor = dc.conn.cursor()
cursor.execute(
    f"SHOW CREATE TABLE {ct.trino_catalog}.{ct.trino_schema}.{ct.trino_table}")
data = cursor.fetchall()
description = cursor.description
cursor.close()

output = "\n\n"
# Add column name

for line in data:
    output += ", ".join([str(c) for c in line])
    output += "\n"

print(output)



CREATE TABLE postgres.public.house_pricing (
   id integer NOT NULL COMMENT 'A unique identifier assigned to each record in the table. It serves as a primary key for indexing and referencing purposes.',
   address varchar(255) COMMENT 'The street address of the house.',
   city varchar(100) COMMENT 'The city where the house is located.',
   state varchar(100) COMMENT 'The state abbreviation where the house is located (e.g., CA, NY).',
   zip_code varchar(20) COMMENT 'The ZIP code of the house''s location.',
   square_feet integer COMMENT 'The total square footage of the house. Larger homes generally command higher prices.',
   bedrooms integer COMMENT 'The number of bedrooms in the house. Bedroom count significantly influences house pricing, with more bedrooms generally commanding higher prices.',
   bathrooms decimal(2, 1) COMMENT 'The number of bathrooms (full and half) in the house. Bathroom count also affects house price, with additional bathrooms often increasing property value.