In [24]:
import pandas as pd
import glob
import xml.etree.ElementTree as ET
from datetime import datetime

log_file = "log_file.txt" 
target_file = "transformed_data.csv" 

def extract_from_csv(file_to_process: str) -> pd.DataFrame:
    """Extracts data from a CSV file and returns it as a pandas DataFrame."""
    print(f"Processing {file_to_process}...")
    dataframe = pd.read_csv(file_to_process)
    return dataframe

def extract_from_json(file_to_process: str) -> pd.DataFrame:
    """Extracts data from a JSON file and returns it as a pandas DataFrame."""
    print(f"Processing {file_to_process}...")
    dataframe = pd.read_json(file_to_process)
    return dataframe

def extract_from_xml(file_to_process): 
    print(f"Processing {file_to_process}...")
    dataframe = pd.DataFrame(columns=["name", "height", "weight"]) 
    tree = ET.parse(file_to_process) 
    root = tree.getroot() 
    for person in root: 
        name = person.find("name").text 
        height = float(person.find("height").text) 
        weight = float(person.find("weight").text) 
        new_data = pd.DataFrame([{"name":name, "height":height, "weight":weight}])
        dataframe = new_data if dataframe.shape[0] < 1 else pd.concat([dataframe, new_data], ignore_index=True) 
    return dataframe 

def transform(data: pd.DataFrame): 
    '''Convert inches to meters and round off to two decimals 
    1 inch is 0.0254 meters '''
    data['height'] = round(data.height * 0.0254,2) 
 
    '''Convert pounds to kilograms and round off to two decimals 
    1 pound is 0.45359237 kilograms '''
    data['weight'] = round(data.weight * 0.45359237,2) 
    
    return data 

def extract(): 
    extracted_data = pd.DataFrame(columns=['name','height','weight']) # create an empty data frame to hold extracted data 
     
    # process all csv files, except the target file
    for csvfile in glob.glob("*.csv"): 
        if csvfile != target_file:  # check if the file is not the target file
            extracted_data = extract_from_csv(csvfile) if extracted_data.shape[0] < 1 else pd.concat([extracted_data, pd.DataFrame(extract_from_csv(csvfile))], ignore_index=True) 
         
    # process all json files 
    for jsonfile in glob.glob("*.json"): 
        extracted_data = pd.concat([extracted_data, pd.DataFrame(extract_from_json(jsonfile))], ignore_index=True) 
     
    # process all xml files 
    for xmlfile in glob.glob("*.xml"): 
        extracted_data = pd.concat([extracted_data, pd.DataFrame(extract_from_xml(xmlfile))], ignore_index=True) 
         
    return extracted_data 

def load_data(target_file: str, transformed_data: pd.DataFrame): 
    transformed_data.to_csv(target_file) 

def log_progress(message: str): 
    timestamp_format = '%Y-%h-%d-%H:%M:%S' # Year-Monthname-Day-Hour-Minute-Second 
    now = datetime.now() # get current timestamp 
    timestamp = now.strftime(timestamp_format) 
    with open(log_file,"a") as f: 
        f.write(timestamp + ',' + message + '\n') 

In [26]:
# Log the initialization of the ETL process 
log_progress("ETL Job Started") 

# Log the beginning of the Extraction process 
log_progress("Extract phase Started") 
extracted_data = extract() 


# Log the completion of the Extraction process 
log_progress("Extract phase Ended") 

# Log the beginning of the Transformation process 
log_progress("Transform phase Started") 
transformed_data = transform(extracted_data) 
print("Transformed Data") 
print(transformed_data) 

# Log the completion of the Transformation process 
log_progress("Transform phase Ended") 

# Log the beginning of the Loading process 
log_progress("Load phase Started") 
load_data(target_file,transformed_data) 

# Log the completion of the Loading process 
log_progress("Load phase Ended") 

# Log the completion of the ETL process 
log_progress("ETL Job Ended") 

Processing source1.csv...
Processing source2.csv...
Processing source3.csv...
Processing source1.json...
Processing source2.json...
Processing source3.json...
Processing source1.xml...
Processing source2.xml...
Processing source3.xml...
Transformed Data
     name  height  weight
0    alex    1.67   51.25
1    ajay    1.82   61.91
2   alice    1.76   69.41
3    ravi    1.73   64.56
4     joe    1.72   65.45
5    alex    1.67   51.25
6    ajay    1.82   61.91
7   alice    1.76   69.41
8    ravi    1.73   64.56
9     joe    1.72   65.45
10   alex    1.67   51.25
11   ajay    1.82   61.91
12  alice    1.76   69.41
13   ravi    1.73   64.56
14    joe    1.72   65.45
15   jack    1.74   55.93
16    tom    1.77   64.18
17  tracy    1.78   61.90
18   john    1.72   50.97
19   jack    1.74   55.93
20    tom    1.77   64.18
21  tracy    1.78   61.90
22   john    1.72   50.97
23   jack    1.74   55.93
24    tom    1.77   64.18
25  tracy    1.78   61.90
26   john    1.72   50.97
27  simon    1.72 