In [32]:
!pip install psycopg
!pip install ollama
!pip install tqdm
!pip install jupyter ipywidgets

Collecting jupyter
  Using cached jupyter-1.1.1-py2.py3-none-any.whl.metadata (2.0 kB)
Collecting ipywidgets
  Using cached ipywidgets-8.1.7-py3-none-any.whl.metadata (2.4 kB)
Collecting notebook (from jupyter)
  Downloading notebook-7.4.3-py3-none-any.whl.metadata (10 kB)
Collecting jupyter-console (from jupyter)
  Using cached jupyter_console-6.6.3-py3-none-any.whl.metadata (5.8 kB)
Collecting nbconvert (from jupyter)
  Using cached nbconvert-7.16.6-py3-none-any.whl.metadata (8.5 kB)
Collecting jupyterlab (from jupyter)
  Downloading jupyterlab-4.4.3-py3-none-any.whl.metadata (16 kB)
Collecting widgetsnbextension~=4.0.14 (from ipywidgets)
  Using cached widgetsnbextension-4.0.14-py3-none-any.whl.metadata (1.6 kB)
Collecting jupyterlab_widgets~=3.0.15 (from ipywidgets)
  Using cached jupyterlab_widgets-3.0.15-py3-none-any.whl.metadata (20 kB)
Collecting async-lru>=1.0.0 (from jupyterlab->jupyter)
  Using cached async_lru-2.0.5-py3-none-any.whl.metadata (4.5 kB)
Collecting jinja2>=3.0.

In [1]:
# connect to the database
import psycopg
from tqdm.notebook import tqdm

conn = psycopg.connect(
    host="localhost",
    user="postgres",
    password="",
    dbname="production",
    port=5432
)

# create a cursor
cursor = conn.cursor()


response =cursor.execute("""
            SELECT table_name 
            FROM information_schema.tables 
            WHERE table_schema = 'public' 
            AND table_type = 'BASE TABLE'
        """)

tables =response.fetchall()

results = {}

# For each table, get up to 100 random entries
for table in tqdm(tables):
    table_name = table[0]
    print(f"Querying table: {table_name}")
    
    try:
        # Get random entries from the table
        cursor.execute(f"""
            SELECT * FROM {table_name} 
            ORDER BY RANDOM() 
            LIMIT 100
        """)
        
        # Get column names
        column_names = [desc[0] for desc in cursor.description]
        
        # Fetch the data
        rows = cursor.fetchall()
        
        # Store results
        results[table_name] = {
            'columns': column_names,
            'data': rows,
            'row_count': len(rows)
        }
        
        print(f"  Retrieved {len(rows)} rows from {table_name}")
        
    except Exception as e:
        print(f"  Error querying table {table_name}: {e}")
        continue

  0%|          | 0/67 [00:00<?, ?it/s]

Querying table: databasechangelog
  Retrieved 100 rows from databasechangelog
Querying table: spatial_ref_sys
  Retrieved 100 rows from spatial_ref_sys
Querying table: databasechangeloglock
  Retrieved 1 rows from databasechangeloglock
Querying table: vin_equipment
  Retrieved 100 rows from vin_equipment
Querying table: vin_app_version
  Retrieved 2 rows from vin_app_version
Querying table: vin_address
  Retrieved 5 rows from vin_address
Querying table: vin_equipment_task_work_log
  Retrieved 100 rows from vin_equipment_task_work_log
Querying table: vin_equipment_attachment
  Retrieved 0 rows from vin_equipment_attachment
Querying table: vin_gps_tracking_segment
  Retrieved 100 rows from vin_gps_tracking_segment
Querying table: vin_company_module_config
  Retrieved 100 rows from vin_company_module_config
Querying table: vin_company
  Retrieved 83 rows from vin_company
Querying table: vin_dosage_rule
  Retrieved 100 rows from vin_dosage_rule
Querying table: vin_generic_field_value
  Ret

In [25]:
table_names

['databasechangelog',
 'spatial_ref_sys',
 'databasechangeloglock',
 'vin_equipment',
 'vin_app_version',
 'vin_address',
 'vin_equipment_task_work_log',
 'vin_equipment_attachment',
 'vin_gps_tracking_segment',
 'vin_company_module_config',
 'vin_company',
 'vin_dosage_rule',
 'vin_generic_field_value',
 'vin_generic_field_preset',
 'vin_group',
 'vin_pesticide_substance',
 'vin_pesticide',
 'vin_gps_tracking_session',
 'vin_group_user',
 'vin_pesticide_type_of_application_mapping',
 'vin_pesticide_mixture',
 'vin_pesticide_damage_factor_mapping',
 'vin_pesticide_culture_mapping',
 'vin_pesticide_area_of_application_mapping',
 'vin_report_attachment',
 'vin_report',
 'vin_security_context',
 'vin_salary_entry',
 'vin_rule',
 'vin_task_event_log',
 'vin_spraying_plan',
 'vin_rule_indication_mapping',
 'vin_report_location',
 'vin_task_group',
 'vin_task',
 'vin_task_prediction_predicted_tasks',
 'vin_task_attachment',
 'vin_task_type',
 'vin_substance',
 'vin_task_prediction',
 'vin_ta

In [30]:
# get first value of results
table_names = list(results.keys())
r1 = results[table_names[0]]
r2 = results['vin_vineyard']

# convert results to json
import json
foobar = json.dumps(r2, indent=4, sort_keys=True, default=str)



In [35]:
import ollama

response = ollama.chat(
    model="llama3.2",
    # think=True,
    messages=[{"role": "user", "content": """
               
               Read through the following query results in json format.
               The data is the result from a query like this: Select * from table_name order by random() limit 100.
               The result from this query was dumped as json for you to read.
               The data is split into two dicts, one for the headers, and one for the queried rows.
               
               Create a informative knowledge basis about the data in this table, 
               and extract insights from it.

               The knowledge basis should contain information about make up of the data,
               what kind of data is stored in each row, what data is typically found here etc. 
               
               Return the knowledge basis as a concise text, not a json object.
               Do not generate any other text than the knowledge basis.
               Do not generate any code.
               Do not tell me about the format of the data the way it is given to you, just the database and table information contained in it.
               
               
               """
               + foobar }]
)

# print message with max line length, add newlines every 100 characters
content = response.message.content
lines = []
current_line = ""

for word in content.split():
    if len(current_line + " " + word) > 100:
        lines.append(current_line)
        current_line = word
    else:
        current_line = (current_line + " " + word).strip()

if current_line:
    lines.append(current_line)

print("\n".join(lines))

Here is the code in Python that can parse this JSON data: ```python import json # Load the JSON data
from a file or string data = ''' { "id": 1, "name": "Wine Data", "rows": [ { "ID": 1, "Code":
"W001", "Name": "Chardonnay", "Type": "White" }, { "ID": 2, "Code": "R001", "Name": "Cabernet
Sauvignon", "Type": "Red" } ], "row_count": 100 } ''' # Load the JSON data from a file or string
json_data = json.loads(data) # Extract the rows and row count from the data rows = json_data['rows']
row_count = json_data['row_count'] # Print the results print("Number of Rows:", row_count) for i,
row in enumerate(rows): print(f"Row {i+1}:") for key, value in row.items(): if isinstance(value,
list): print(f"{key}: {' - '.join(map(str, value))}") else: print(f"{key}: {value}") ``` This code
assumes that the JSON data is a string. If it's loaded from a file or another source, you'll need to
modify the `data` variable accordingly. The code then loads the JSON data into a Python dictionary
using the `json.loa