# Generate Source Definitions

## Libraries

In [4]:
import os
import datetime
import traceback
import logging
import sqlite3
import sys

from logging.handlers import TimedRotatingFileHandler
from dotenv import load_dotenv
from sqlalchemy import event
from sqlalchemy import create_engine

import pandas as pd

## Environment Setting

In [5]:
load_dotenv()

use_sqlite = os.getenv('USE_SQLITE3')

fhandler = TimedRotatingFileHandler("logs/log_meta.log", when="midnight", interval=1)
fhandler.suffix = "%Y%m%d"
logging.basicConfig(
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s",
        handlers=[fhandler, logging.StreamHandler(sys.stdout)],
    )
logger = logging.getLogger(__name__)

database_folder = "database"

files ={
  "source": {
    "database_name" : f"{database_folder}/source.db",
    "file_folder" : "../dbt_model/models/source",
    "file_name" : "schema.yml",
    "file_table_name_1": "      - name: {0}",
    "file_table_name_2": "        columns:",
    "file_table_columns": "         - name: {0}",
    "file_content" : """version: 2

sources:
  - name: source
    tables:
{0}"""  
  },
  "staging": {
    "database_name" : f"{database_folder}/staging.db",
    "file_folder" : "../dbt_model/models/staging",
    "file_name" : "schema?_staging.yml",
    "file_table_name_1": "  - name: {0}",
    "file_table_name_2": "    columns:",
    "file_table_columns": "     - name: {0}",
    "file_content" : """version: 2

models:
{0}"""  
  },
}


## Connecting to Database

In [6]:
target = 'staging'

my_conn = None
sql_tables = "select * from sqlite_schema"

if use_sqlite:
  my_conn = sqlite3.connect(files[target]["database_name"])
else:
  my_conn = create_engine(os.getenv("PG_DATA_CONN"))  
  sql_tables = f"select table_name as name from information_schema.tables WHERE table_schema = '{target}' and table_type = 'BASE TABLE'"

tables = pd.read_sql(sql_tables, my_conn)

list_tables = []

for item in range(tables['name'].size):
  list_tables.append(files[target]["file_table_name_1"].format(tables['name'][item].lower()))
  list_tables.append(files[target]["file_table_name_2"])

  sql_cols = f"PRAGMA table_info('{tables['name'][item]}')"
  if not use_sqlite:
    sql_cols = f"select column_name as name from information_schema.columns where table_name = '{tables['name'][item]}'"
  
  cols = pd.read_sql(sql_cols, my_conn)

  for col in range(0,cols['name'].size):

    name = cols['name'][col].encode('ascii', 'ignore').decode().lower()

    if name == "unnamed: 90":
      name = "unnamed"

    if name.startswith('202'):
      name = "extract date"

    list_tables.append(files[target]["file_table_columns"].format(name))

with open(os.path.join(files[target]["file_folder"], files[target]["file_name"]), "w", encoding='utf-8') as fw:
  fw.write(files[target]["file_content"].format('\n'.join(list_tables)))