In [16]:
import pandas as pd
import logging

This is an example ETL process using Python and Pandas Dataframe

Setup Logger with Log file handling 

In [17]:
# Set up logging to console and file
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# Add a file handler to save log messages to a file
file_handler = logging.FileHandler('etl_log.log')
file_handler.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)

# Function to load data from CSV

In [18]:
def extract(file_path):
    logger.info(f"Extracting data from CSV file: {file_path}")
    df = pd.read_csv(file_path)
    return df

# Function to clean and transform data

In [19]:
def transform(data):
    logger.info("Transforming and cleaning data")
    # Example: Convert 'Date' column to datetime format
    data['Date'] = pd.to_datetime(data['Date'], errors='coerce')
    # Clean duplicates based on specific columns ('ID' and 'Name')
    cleaned_data = data.drop_duplicates(subset=['ID'])
    return cleaned_data


# Function to load data into destination (e.g., database)

In [20]:
def load(data, destination_path):
    logger.info(f"Loading data into destination: {destination_path}")
    # Example: Save the cleaned data to a new CSV file
    data.to_csv(destination_path, index=False)
    logger.info("Data loaded successfully")

Set source and ETL file destination

In [21]:
file_path = '/home/christopher/Documents/interview_prep/dummy_data.csv'
destination_path = '/home/christopher/Documents/interview_prep/cleaned_dummy_data.csv'

In [22]:
# ETL Process
data = extract(file_path)
cleaned_data = transform(data)
load(cleaned_data, destination_path)

2023-11-10 20:39:53,506 - INFO - Extracting data from CSV file: /home/christopher/Documents/interview_prep/dummy_data.csv
2023-11-10 20:39:53,512 - INFO - Transforming and cleaning data
2023-11-10 20:39:53,517 - INFO - Loading data into destination: /home/christopher/Documents/interview_prep/cleaned_dummy_data.csv
2023-11-10 20:39:53,525 - INFO - Data loaded successfully
