# Data loader

# Loading CSV, XML, YML, and JSON data

In [1]:
import pandas as pd
import yaml
import json
import xml.etree.ElementTree as ET

# YAML Loader

In [2]:
def load_yaml(file_path):
    """Loads data from a YAML file into a Pandas DataFrame."""
    with open(file_path, "r") as file:
        data = yaml.safe_load(file)
    return pd.DataFrame(data)


# Load and display YAML dataframe

In [3]:
people_yml_df = load_yaml("data/people.yml")
display(people_yml_df.head())

Unnamed: 0,Android,Desktop,Iphone,city,email,id,name,phone
0,1,0,0,"Montreal, Canada",Jamie.Bright@example.com,1,Jamie Bright,533-849-3913
1,1,1,1,"Toronto, Canada",Yusra.Fletcher@example.com,3,Yusra Fletcher,385-702-8874
2,1,0,0,"Paris, France",Beatrix.Everett@example.com,5,Beatrix Everett,194-640-2758
3,1,0,0,"Toronto, Canada",Asa.Vazquez@example.com,9,Asa Vazquez,504-421-4485
4,1,1,0,"Toronto, Canada",Carmen.Burton@example.com,19,Carmen Burton,527-368-7429


# JSON Loader

In [4]:
def load_json(file_path):
    """Loads data from a JSON file into a Pandas DataFrame."""
    with open(file_path, "r") as file:
        data = json.load(file)
    return pd.DataFrame(data)

# Load and display JSON dataframe

In [5]:
people_json_df = load_json("data/people.json")
display(people_json_df.head())

Unnamed: 0,id,first_name,last_name,telephone,email,devices,location
0,1,Jamie,Bright,533-849-3913,Jamie.Bright@example.com,[Android],"{'City': 'Montreal', 'Country': 'Canada'}"
1,2,Arabella,Knox,652-272-9539,Arabella.Knox@example.com,"[Android, Iphone]","{'City': 'Los Angeles', 'Country': 'USA'}"
2,3,Yusra,Fletcher,385-702-8874,Yusra.Fletcher@example.com,"[Android, Iphone, Desktop]","{'City': 'Toronto', 'Country': 'Canada'}"
3,4,Esme,Nielsen,621-797-5367,Esme.Nielsen@example.com,"[Android, Iphone]","{'City': 'Los Angeles', 'Country': 'USA'}"
4,6,Dean,Lewis,243-955-3782,Dean.Lewis@example.com,"[Android, Iphone, Desktop]","{'City': 'San Diego', 'Country': 'USA'}"


# CSV Loader

In [6]:
def load_csv(file_path):
    """Loads data from a CSV file into a Pandas DataFrame."""
    return pd.read_csv(file_path)

# Load and display CSV datafme

In [7]:
transfers_df = load_csv("data/transfers.csv")
display(transfers_df.head())

Unnamed: 0,sender_id,recipient_id,amount,date
0,926,531,70.47,2022-01-07
1,280,735,21.88,2022-01-10
2,571,737,44.68,2022-01-13
3,958,945,39.94,2022-01-17
4,373,817,6.98,2022-01-17


# XML Transactions Parser

# Nested elements and structure of XML requires iteration to structure items in transaction into list

In [8]:
def parse_transactions_xml(file_path):
    """Parses transaction data from an XML file into a Pandas DataFrame."""
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
        transactions = []

        for transaction in root.findall("transaction"):
            transaction_data = {
                "id": transaction.attrib.get("id"),
                "phone": transaction.find("phone").text if transaction.find("phone") is not None else None,
                "store": transaction.find("store").text if transaction.find("store") is not None else None
            }
        # Extract items as a list
            items_list = []
            items_tag = transaction.find("items")
            if items_tag is not None:
                for item in items_tag.findall("item"):
                    item_name_tag = item.find("item")
                    item_name = item_name_tag.text if item_name_tag is not None else "Unknown"
                    quantity_tag = item.find("quantity")
                    quantity = quantity_tag.text if quantity_tag is not None else "1"
                    items_list.append(f"{item_name} (x{quantity})")

            transaction_data["items"] = items_list
            transactions.append(transaction_data)

        df = pd.DataFrame(transactions)
        df["items"] = df["items"].apply(lambda x: x if isinstance(x, list) else [])
        return df

    except Exception as e:
        print(f"‚ùå Error parsing transactions XML: {e}")
        return None

# Load and display XML dataframe

In [9]:
transactions_df = parse_transactions_xml("data/transactions.xml")
display(transactions_df.head())

Unnamed: 0,id,phone,store,items
0,1000,233-159-4158,Trader Tales,"[Krafty Cheddar (x1), Popsi (x1), Oreoz (x2)]"
1,1001,725-427-2794,PetPals Mart,[Colgatex (x1)]
2,1002,659-190-9378,Trader Tales,"[Colgatex (x1), Dovee (x1), Flixnet (x1)]"
3,1003,334-436-6254,Urban Outfitters Loft,"[Krafty Cheddar (x2), RedCow (x1), Dovee (x2)]"
4,1004,615-102-4849,PetPals Mart,"[Popsi (x1), Flixnet (x1)]"


In [12]:
!jupyter nbconvert --to script data_loader.ipynb

[NbConvertApp] Converting notebook data_loader.ipynb to script
[NbConvertApp] Writing 2954 bytes to data_loader.py
