<a href="https://colab.research.google.com/github/dekatesakshi677-dotcom/OIBSIP/blob/main/ingestion_db.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import os
from sqlalchemy import create_engine
import logging
import time

# Create 'logs' directory if it doesn't exist
log_dir = 'logs'
if not os.path.exists(log_dir):
    os.makedirs(log_dir)

logging.basicConfig(
    filename=os.path.join(log_dir, "ingestion_db.log"),
    level=logging.DEBUG,
    format="%(asctime)s - %(levelname)s - %(message)s",
    filemode="a"
)

engine = create_engine('sqlite:///inventory.db')

def ingest_db(df, table_name, engine):
    '''this function will ingest the dataframe into database table'''
    df.to_sql(table_name, con = engine, if_exists = 'replace', index = False)

def load_raw_data():
    '''this function will load the CSVs as dataframe and ingest into db'''
    start = time.time()
    # Define the directory where CSV files are located
    data_directory = '/content/'

    # Ensure the directory exists before listing its contents
    if not os.path.exists(data_directory):
        logging.error(f"Data directory not found: {data_directory}")
        return # Exit the function if the directory doesn't exist

    for file_name in os.listdir(data_directory):
        # Construct the full file path
        full_file_path = os.path.join(data_directory, file_name)

        # Check if it's a CSV file and not a directory
        if os.path.isfile(full_file_path) and file_name.endswith('.csv'):
            try:
                df = pd.read_csv(full_file_path)
                table_name = os.path.splitext(file_name)[0] # Get table name without extension
                logging.info(f'Ingesting {file_name} into table {table_name} in db')
                ingest_db(df, table_name, engine)
            except Exception as e:
                logging.error(f"Error processing file {file_name}: {e}")
                # Continue to next file even if one fails

    end = time.time()
    total_time = (end - start)/60
    logging.info('--------------Ingestion Complete------------')
    logging.info(f'\nTotal Time Taken: {total_time} minutes')

if __name__ == '__main__':
    load_raw_data()