### Import libraries

In [1]:
import os
import openai
from sqlalchemy import create_engine, MetaData
import pandas as pd
import os
from sqlalchemy.engine import URL

### OpenAI Instance using Ollama

In [2]:
# Setup model API
openai.api_key = "ollama"
client = openai.OpenAI(
    api_key=openai.api_key,
    base_url="http://localhost:11434/v1"
)

### Call to Model API to Generact Docs

In [3]:
def classify_columns(column):
    try:
        response = client.chat.completions.create(
            model="llama3",
            messages=[
                {"role": "system", "content": """
                    You are a helpful assistant designed to generate description for columns in database table. 
                    Must respond with brief description for the provided column.
                    
                    ### Database Schema
                    Use the following Postgres database schema.
                    CREATE TABLE dimproduct (
                            product_key text primary key,
                            productid bigint,
                            product_name text,
                            productnumber text,
                            color text,
                            daystomanufacture bigint,
                            safetystocklevel bigint,
                            standardcost double precision,
                            product_subcategory_name text,
                            product_category_name text,
                            sellstartdate text,
                            sellenddate text
                            );

                    """
                },
                {"role": "user", "content": column} 
            ]
        )
        description = response.choices[0].message.content.strip()
        print(f"Column: '{column}' -> description: '{description}'")
        return description.title()
    except Exception as e:
        print(f"Error in classifying the column: {e}")
        return "Process Failed"

### Get Table Columns from DB

In [4]:
#get password from environmnet var
pwd = os.environ['PGPASS']
uid = os.environ['PGUID']
#sql db details
server = "192.168.1.39"
database = "adventureworks;"

In [5]:
DB_URL = f'postgresql://{uid}:{pwd}@{server}:5432/adventureworks'
engine = create_engine(DB_URL, connect_args={'options': '-csearch_path={}'.format('sales')})

In [6]:
df = pd.read_sql_query(f"select * from product limit 10", engine)
df.head()

Unnamed: 0,product_key,productid,product_name,productnumber,color,daystomanufacture,safetystocklevel,standardcost,product_subcategory_name,product_category_name,sellstartdate,sellenddate
0,7b13b2203029ed80337f27127a9f1d28,993,"Mountain-500 Black, 52",BK-M18B-52,Black,4,100,294.5797,Mountain Bikes,Bikes,2013-05-30T00:00:00.000000Z,
1,860320be12a1c050cd7731794e231bd3,992,"Mountain-500 Black, 48",BK-M18B-48,Black,4,100,294.5797,Mountain Bikes,Bikes,2013-05-30T00:00:00.000000Z,
2,692f93be8c7a41525c0baf2076aecfb4,991,"Mountain-500 Black, 44",BK-M18B-44,Black,4,100,294.5797,Mountain Bikes,Bikes,2013-05-30T00:00:00.000000Z,
3,4fac9ba115140ac4f1c22da82aa0bc7f,990,"Mountain-500 Black, 42",BK-M18B-42,Black,4,100,294.5797,Mountain Bikes,Bikes,2013-05-30T00:00:00.000000Z,
4,a1140a3d0df1c81e24ae954d935e8926,989,"Mountain-500 Black, 40",BK-M18B-40,Black,4,100,294.5797,Mountain Bikes,Bikes,2013-05-30T00:00:00.000000Z,


In [7]:
df.columns

Index(['product_key', 'productid', 'product_name', 'productnumber', 'color',
       'daystomanufacture', 'safetystocklevel', 'standardcost',
       'product_subcategory_name', 'product_category_name', 'sellstartdate',
       'sellenddate'],
      dtype='object')

### Test the Model API & Generate Docs

In [8]:
category = classify_columns("product_key")

Column: 'product_key' -> description: 'Unique identifier for a product in the database.'


In [9]:
# Initialize an empty dictionary
data = {'column': [], 'description': []}
for column in df:
    print(column)
    description = classify_columns(column)
    # Append data to the lists
    data['column'].append(column)
    data['description'].append(description)

product_key
Column: 'product_key' -> description: 'Unique identifier for each product, serving as the primary key in the table.'
productid
Column: 'productid' -> description: 'Unique identifier for each product.'
product_name
Column: 'product_name' -> description: 'Product name: A brief description that identifies the specific product.'
productnumber
Column: 'productnumber' -> description: 'Unique identifier assigned to each product variant.'
color
Column: 'color' -> description: 'The `color` column represents the color or finish of a product.'
daystomanufacture
Column: 'daystomanufacture' -> description: 'The "daystomanufacture" column in the dimproduct table represents the number of days it takes to manufacture a particular product.'
safetystocklevel
Column: 'safetystocklevel' -> description: 'The `safetystocklevel` column represents the minimum stock level for a product that is considered safe to ensure timely fulfillment of customer orders.'
standardcost
Column: 'standardcost' -> d