In [1]:
import yaml
import pandas as pd
import os
import logging
from database.manager import DatabaseManager
from preprocessing.database_preprocessing import DatabasePreprocessor
from utils.schema_reader import SchemaReader


# Configure logging
logging.basicConfig(level=logging.INFO)


def read_params_file(file_path):
    """
    Reads a YAML parameters file and returns the content as a dictionary.

    Args:
        file_path (str): The path to the YAML parameters file.

    Returns:
        dict: A dictionary containing the parameters read from the file.
    """
    with open(file_path) as yaml_file:
        params = yaml.safe_load(yaml_file)
    return params


def connect_to_database(params):
    """
    Connects to the database using the provided parameters.

    Args:
        params (dict): Database connection parameters.

    Returns:
        DatabaseManager: An instance of DatabaseManager representing the database connection.
    """
    db_manager = DatabaseManager(dbname=params['database']['config']['dbname'],
                                 host=params['database']['config']['host'],
                                 user=params['database']['config']['user'],
                                 port=params['database']['config']['port'],
                                 password=params['database']['config']['password'])

    return db_manager



In [2]:

params = read_params_file(os.path.join('..' ,'config', 'params.yaml'))

# Connect to the database
db_manager = connect_to_database(params)

# Process training dataset
########################################################################
train_table_name = params['database']['train_table_name']

In [5]:
db_manager.select_from_table(table_name=train_table_name)

[(49,
  'blue-collar',
  'married',
  'basic.9y',
  'unknown',
  'no',
  'no',
  'cellular',
  'nov',
  'wed',
  227,
  4,
  999,
  0,
  'nonexistent',
  'no'),
 (37,
  'entrepreneur',
  'married',
  'university.degree',
  'no',
  'no',
  'no',
  'telephone',
  'nov',
  'wed',
  202,
  2,
  999,
  1,
  'failure',
  'no'),
 (78,
  'retired',
  'married',
  'basic.4y',
  'no',
  'no',
  'no',
  'cellular',
  'jul',
  'mon',
  1148,
  1,
  999,
  0,
  'nonexistent',
  'yes'),
 (36,
  'admin.',
  'married',
  'university.degree',
  'no',
  'yes',
  'no',
  'telephone',
  'may',
  'mon',
  120,
  2,
  999,
  0,
  'nonexistent',
  'no'),
 (59,
  'retired',
  'divorced',
  'university.degree',
  'no',
  'no',
  'no',
  'cellular',
  'jun',
  'tue',
  368,
  2,
  999,
  0,
  'nonexistent',
  'no'),
 (29,
  'admin.',
  'single',
  'university.degree',
  'no',
  'no',
  'no',
  'cellular',
  'aug',
  'wed',
  256,
  2,
  999,
  0,
  'nonexistent',
  'no'),
 (26,
  'student',
  'single',
  'basic