In [1]:
import sys
import requests
import json
import pandas as pd
import numpy as np

from sqlalchemy import create_engine
pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

In [2]:
def getFormMetadata(url, title, username, password):
    auth = auth = (username, password)
    response = requests.get(url, auth=auth)

    if(response.status_code == 200):
        print("form metadata request successfull")
        data = response.json()
    else:
        print(f"form metadata request failed with status code: {response.status_code}")
        print(response.text)

    if(len(data) == 0):
        return -1

    for form in data:
        if form["title"] == title:
            metadata = {"title": title,
                        "formid": form["id_string"],
                        "uuid": form["uuid"],
                        "code": form["formid"]}

    return(metadata)

In [3]:
def koboCsvData(url, username, password, additional = ""):
    r = url + '?format=json' + additional
    auth = (username, password)
    response = requests.get(r, auth=auth)

    if(response.status_code == 200):
        data = pd.DataFrame(response.json())
    else: 
        print(f"data request failed with status code: {response.status_code}")
        print(response.text)

    return(data)

In [4]:
def connectToPostgres(host, port, database, user, password):
    """
    Attempts to establish a connection to a PostgreSQL database using SQLAlchemy.

    Args:
        host (str): The hostname or IP address of the database server.
        port (int): The port number on which the database server is listening.
        database (str): The name of the database to connect to.
        user (str): The username to use for authentication.
        password (str): The password associated with the specified username.

    Returns:
        engine (sqlalchemy.engine.Engine): The SQLAlchemy engine object if successful, None otherwise.
        error_message (str): An error message if the connection fails, None otherwise.
    """

    try:
        # Construct the database connection URL
        connection_url = f"postgresql://{user}:{password}@{host}:{port}/{database}"

        # Create the SQLAlchemy engine
        engine = create_engine(connection_url)

        # Test the connection by attempting to connect
        with engine.connect() as conn:
            print("Connection to PostgreSQL database successful!")
            return engine, None  # Return engine object and no error message

    except Exception as e:
        print(f"Error connecting to PostgreSQL database: {e}")
        return None, str(e)  # Return no engine and the error message


In [101]:
def getDataFromDB(con, q):
    df = pd.read_sql_query(q, con)
    return(df)

In [6]:
host = "localhost"
database = "trace_db"
user = "postgres"
password = "root"
port = "5432"
engine, error_message = connectToPostgres(host=host, port=port, database=database, user=user, password=password)

Connection to PostgreSQL database successful!


In [12]:
if engine:
    query = "SELECT * FROM kobo_connections WHERE is_active = 1"
    koboform_url = getDataFromDB(engine, query)
else:
    print(f"Failed to connect: {error_message}")
    sys.exit(1)

In [None]:
for active_forms in range(len(koboform_url)):
    type = koboform_url.loc[active_forms]['form_type']
    # form pekebun
    if type == 1:
        form = koboform_url.loc[active_forms]

        query = f"SELECT * FROM farmers WHERE kobo_connections_id = '{form['id']}'"
        df_farms = getDataFromDB(engine, query)
        if df_farms.shape[0] == 0:
            df_pekebun = koboCsvData(form['api_url'], form['username'], form['password'])

    # form kebun
    if type == 2:
        form = koboform_url.loc[active_forms]

        q1 = f"SELECT * FROM farms WHERE kobo_connections_id = '{form['id']}'"
        df_farms = getDataFromDB(engine, q1)

        q2 = "SELECT * FROM farm_mills"
        df_farm_mills = getDataFromDB(engine, q2)

        if df_farms.shape[0] == 0:
            df_kebun = koboCsvData(form['api_url'], form['username'], form['password'])
            df_kebun = df_kebun.replace(['inf', 'Infinity', '-inf', '-Infinity'], np.nan)
        else:
            latest_submission_time = pd.to_datetime(df_farms['submission_time']).max().strftime('%Y-%m-%dT%H:%M:%S')
            add = "&query={\"_submission_time\":{\"$gt\":\"" + latest_submission_time + "\"}}"
            df_kebun = koboCsvData(type_2.at[1, 'api_url'], 
                                   type_2.at[1, 'username'], 
                                   type_2.at[1, 'password'], 
                                   add)
        
        # Add a new row (replace with your actual data)
        new = []
        new_farm_mills = []
        for i in range(df_kebun.shape[0]):
            new_row = {
                "id": df_kebun['_id'].values[i],
                "username":  df_kebun['username'].values[1],
                "interName": df_kebun['enumInfo/interName'].values[i],
                "surveyDate": df_kebun['enumInfo/surveyDate'].values[i],
                "farmerName": df_kebun['farmerInfo/farmerName'].values[i],
                "farmer_phone": df_kebun['farmerInfo/farmer_phone'].values[i],
                "ownership": 1 if df_kebun['farmerInfo/ownership'].values[i] == 'yes' else 0,
                "ownerName": df_kebun['farmerInfo/ownerName'].values[i],
                "owner_phone": df_kebun['farmerInfo/owner_phone'].values[i],
                "district_id": df_kebun['farmerAddress/farmerDistrict'].values[i],
                "sub_district_id": df_kebun['farmerAddress/farmerSubdistrict'].values[i],
                "village_id": df_kebun['farmerAddress/farmerVillage'].values[i],
                "address": df_kebun['farmerAddress/detailAddress'].values[i],
                "plant_age": df_kebun['plantationInfo/plant_age'].values[i],
                "plant_areaDoc": df_kebun['plantationInfo/plant_areaDoc'].values[i],
                "plant_doc": df_kebun['plantationInfo/plant_doc'].values[i],
                "plant_category": df_kebun['plantationInfo/plant_category'].values[i],
                "seedcert": df_kebun['plantationInfo/seedcert'].values[i],
                "seedsource": df_kebun['plantationInfo/seedsource'].values[i],
                "seedsourceother": df_kebun['plantationInfo/seedsourceother'].values[i],
                "seed_invoice": df_kebun['plantationInfo/seed_invoice'].values[i],
                "yield_minKG": df_kebun['plantationProd/yield_minKG'].values[i],
                "yield_maxKG": df_kebun['plantationProd/yield_maxKG'].values[i],
                "yield_meanKG": df_kebun['plantationProd/yield_meanKG'].values[i],
                "yield_meanTon": df_kebun['plantationProd/yield_meanTon'].values[i],
                "harvest_period": df_kebun['plantationProd/harvest_period'].values[i],
                "harvest_freqYear": df_kebun['plantationProd/harvest_freqYear'].values[i],
                "plant_production": df_kebun['plantationProd/plant_production'].values[i],
                "plant_productivity": df_kebun['plantationProd/plant_productivity'].values[i],
                "millsNumber": df_kebun['millsNumber'].values[i],
                "plant_point": df_kebun['plantation/plant_point'].values[i],
                "plant_polygon": df_kebun['plantation/plant_polygon'].values[i],
                "plant_polyAreaM2": df_kebun['plantation/plant_polyAreaM2'].values[i],
                "plant_polyAreaHa": df_kebun['plantation/plant_polyAreaHa'].values[i],
                "percent_difArea": df_kebun['plantation/percent_difArea'].values[i],
                "image_url": df_kebun['_attachments'].values[i][0]['download_url'],
                "submission_time": pd.to_datetime(df_kebun['_submission_time'].values[i]).strftime('%Y-%m-%d %H:%M:%S'),
                "kobo_connections_id": form['id'],
                "created_at": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
                "updated_at": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
            }  # Adjust column names and values
            new.append(new_row)

            n = len(df_kebun['numMills'][i])
            m = df_kebun['numMills'][i]
            index = i + 1 
            for j in range(n):
                new_mills_row = {
                    "id": index,
                    "farm_id": m[j]['numMills/traceability/millDistrict'],
                    "mill_id": m[j]['numMills/traceability/millsTo'],
                    "created_at": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
                    "updated_at": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
                }
                new_farm_mills.append(new_mills_row)
                index = index + 1

        new_df = pd.DataFrame(new)
        df_farms = pd.concat([df_farms, new_df], ignore_index=True)

        new_mill_df = pd.DataFrame(new_farm_mills)
        df_farm_mills = pd.concat([df_farm_mills, new_mill_df], ignore_index=True)

        df_farms.to_sql('farms', engine, if_exists='append', index=False)
        df_farm_mills.to_sql('farm_mills', engine, if_exists='append', index=False)

In [166]:
query = f"SELECT * FROM farms WHERE kobo_connections_id = '{form['id']}'"
df_farms = getDataFromDB(engine, query)
df_farms

Unnamed: 0,id,username,interName,surveyDate,farmerName,farmer_phone,ownership,ownerName,owner_phone,district_id,sub_district_id,village_id,address,plant_age,plant_areaDoc,plant_doc,plant_category,seedcert,seedsource,seedsourceother,seed_invoice,yield_minKG,yield_maxKG,yield_meanKG,yield_meanTon,harvest_period,harvest_freqYear,plant_production,plant_productivity,millsNumber,plant_point,plant_polygon,plant_polyAreaM2,plant_polyAreaHa,percent_difArea,image_url,submission_time,kobo_connections_id,created_at,updated_at


In [167]:
q2 = "SELECT * FROM farm_mills"
df_farm_mills = getDataFromDB(engine, q2)
df_farm_mills

Unnamed: 0,id,farm_id,mill_id,created_at,updated_at


In [65]:
df_kebun = koboCsvData(form['api_url'], form['username'], form['password'])
df_kebun = df_kebun.replace(['inf', 'Infinity', '-inf', '-Infinity'], np.nan)

In [168]:
new = []
new_farm_mills = []
index = 1
for i in range(df_kebun.shape[0]):
    new_row = {
        "id": df_kebun['_id'].values[i],
        "username":  df_kebun['username'].values[1],
        "interName": df_kebun['enumInfo/interName'].values[i],
        "surveyDate": df_kebun['enumInfo/surveyDate'].values[i],
        "farmerName": df_kebun['farmerInfo/farmerName'].values[i],
        "farmer_phone": df_kebun['farmerInfo/farmer_phone'].values[i],
        "ownership": 1 if df_kebun['farmerInfo/ownership'].values[i] == 'yes' else 0,
        "ownerName": df_kebun['farmerInfo/ownerName'].values[i],
        "owner_phone": df_kebun['farmerInfo/owner_phone'].values[i],
        "district_id": df_kebun['farmerAddress/farmerDistrict'].values[i],
        "sub_district_id": df_kebun['farmerAddress/farmerSubdistrict'].values[i],
        "village_id": df_kebun['farmerAddress/farmerVillage'].values[i],
        "address": df_kebun['farmerAddress/detailAddress'].values[i],
        "plant_age": df_kebun['plantationInfo/plant_age'].values[i],
        "plant_areaDoc": df_kebun['plantationInfo/plant_areaDoc'].values[i],
        "plant_doc": df_kebun['plantationInfo/plant_doc'].values[i],
        "plant_category": df_kebun['plantationInfo/plant_category'].values[i],
        "seedcert": df_kebun['plantationInfo/seedcert'].values[i],
        "seedsource": df_kebun['plantationInfo/seedsource'].values[i],
        "seedsourceother": df_kebun['plantationInfo/seedsourceother'].values[i],
        "seed_invoice": df_kebun['plantationInfo/seed_invoice'].values[i],
        "yield_minKG": df_kebun['plantationProd/yield_minKG'].values[i],
        "yield_maxKG": df_kebun['plantationProd/yield_maxKG'].values[i],
        "yield_meanKG": df_kebun['plantationProd/yield_meanKG'].values[i],
        "yield_meanTon": df_kebun['plantationProd/yield_meanTon'].values[i],
        "harvest_period": df_kebun['plantationProd/harvest_period'].values[i],
        "harvest_freqYear": df_kebun['plantationProd/harvest_freqYear'].values[i],
        "plant_production": df_kebun['plantationProd/plant_production'].values[i],
        "plant_productivity": df_kebun['plantationProd/plant_productivity'].values[i],
        "millsNumber": df_kebun['millsNumber'].values[i],
        "plant_point": df_kebun['plantation/plant_point'].values[i],
        "plant_polygon": df_kebun['plantation/plant_polygon'].values[i],
        "plant_polyAreaM2": df_kebun['plantation/plant_polyAreaM2'].values[i],
        "plant_polyAreaHa": df_kebun['plantation/plant_polyAreaHa'].values[i],
        "percent_difArea": df_kebun['plantation/percent_difArea'].values[i],
        "image_url": df_kebun['_attachments'].values[i][0]['download_url'],
        "submission_time": pd.to_datetime(df_kebun['_submission_time'].values[i]).strftime('%Y-%m-%d %H:%M:%S'),
        "kobo_connections_id": form['id'],
        "created_at": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
        "updated_at": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
    }  # Adjust column names and values
    new.append(new_row)

    n = len(df_kebun['numMills'][i])
    m = df_kebun['numMills'][i]
    for j in range(n):
        new_mills_row = {
            "id": index,
            "farm_id": m[j]['numMills/traceability/millDistrict'],
            "mill_id": m[j]['numMills/traceability/millsTo'],
            "created_at": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
            "updated_at": pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S')
        }
        new_farm_mills.append(new_mills_row)
        index = index + 1

new_df = pd.DataFrame(new)
df_farms = pd.concat([df_farms, new_df], ignore_index=True)

new_mill_df = pd.DataFrame(new_farm_mills)
df_farm_mills = pd.concat([df_farm_mills, new_mill_df], ignore_index=True)

In [139]:
df_farms.to_sql('farms', engine, if_exists='append', index=False)

313

In [144]:
df_kebun['numMills'][0]

[{'numMills/traceability/millDistrict': 'ID6403',
  'numMills/traceability/millsTo': 'PO1000008233',
  'numMills/traceability/millsRSPO': 'Not RSPO Certified'},
 {'numMills/traceability/millDistrict': 'ID6403',
  'numMills/traceability/millsTo': 'PO1000014622',
  'numMills/traceability/millsRSPO': 'Not RSPO Certified'},
 {'numMills/traceability/millDistrict': 'ID6403',
  'numMills/traceability/millsTo': 'PO1000001488',
  'numMills/traceability/millsRSPO': 'Not RSPO Certified'}]

In [169]:
df_farm_mills = df_farm_mills.drop('id', axis=1)

In [170]:
df_farm_mills.to_sql('farm_mills', engine, if_exists='append', index=False)

399