# Load SpaceParts data
- SpaceParts is a fictional company that sells... spaceship parts.
- The SQL database is a free, public training database for learning data tools like Fabric or Power BI.
- This notebook provides you with a simple and convenient way to load data from the SpaceParts database to practice either in the notebook or to write it to a Fabric data item for further use.

## Note: SpaceParts database is IP from Tabular Editor
- You can only use this database for non-commercial use.
- Respect the terms and conditions of the license agreement.

### Import libraries

In [1]:
import pyodbc
import pandas as pd
from pyspark.sql import SparkSession

StatementMeta(, 21964a4d-c9bf-4958-b7ac-1b6f1c0b9019, 3, Finished, Available, Finished)

### Function to load data from a single table
- Used in a later function to load all tables

In [2]:
def load_data_spark(table_name, server, database, username, password, schema=None):
    """Load data using Fabric's built-in Spark connector"""
    spark = SparkSession.builder.getOrCreate()
    
    # Handle table names with spaces by enclosing in square brackets
    table_name_formatted = f"[{table_name}]"
    schema_formatted = f"[{schema}]" if schema else None
    
    # Create properly formatted table name
    if schema_formatted:
        full_table_name = f"{schema_formatted}.{table_name_formatted}"
    else:
        full_table_name = table_name_formatted
    
    # Create connection string with encryption settings
    jdbc_url = f"jdbc:sqlserver://{server};databaseName={database};encrypt=true;trustServerCertificate=true;"
    
    df = spark.read \
        .format("jdbc") \
        .option("url", jdbc_url) \
        .option("dbtable", full_table_name) \
        .option("user", username) \
        .option("password", password) \
        .load()
    
    return df

StatementMeta(, 21964a4d-c9bf-4958-b7ac-1b6f1c0b9019, 4, Finished, Available, Finished)

### Functions to standardize column / table names
- Removes capitals and spaces in names after loading the tables.

In [3]:
def standardize_column_names(df):
    """Convert all column names to lowercase with underscores instead of spaces"""
    for column in df.columns:
        # Convert to lowercase, replace spaces with underscores, and remove any special characters
        new_column = re.sub(r'[^a-zA-Z0-9_]', '', column.lower().replace(' ', '_'))
        df = df.withColumnRenamed(column, new_column)
    return df

def convert_table_names_for_lakehouse(table_name):
    """Convert table names to lowercase with underscores instead of spaces"""
    return table_name.lower().replace(' ', '_')

StatementMeta(, 21964a4d-c9bf-4958-b7ac-1b6f1c0b9019, 5, Finished, Available, Finished)

### Function to load all tables from the SpaceParts database
- Alternatively, you can load one table at a time.

In [4]:
def load_spaceparts(server, database, username, password):
    tables = {
        # DimView tables
        'brands': ('DimView', 'Brands'),
        'budget_rate': ('DimView', 'Budget Rate'),
        'customers': ('DimView', 'Customers'),
        'employees': ('DimView', 'Employees'),
        'exchange_rate': ('DimView', 'Exchange Rate'),
        'invoice_doc_type': ('DimView', 'Invoice Document Type'),
        'order_doc_type': ('DimView', 'Order Document Type'),
        'order_status': ('DimView', 'Order Status'),
        'products': ('DimView', 'Products'),
        'regions': ('DimView', 'Regions'),
        
        # FactView tables
        'budget': ('FactView', 'Budget'),
        'forecast': ('FactView', 'Forecast'),
        'invoices': ('FactView', 'Invoices'),
        'orders': ('FactView', 'Orders')
    }
    
    result = {}
    
    for key, (schema, table) in tables.items():
        try:
            result[key] = load_data_spark(table, server, database, username, password, schema)
            print(f"Successfully loaded {schema}.{table}")
        except Exception as e:
            print(f"Error loading {schema}.{table}: {str(e)}")
    
    return result

StatementMeta(, 21964a4d-c9bf-4958-b7ac-1b6f1c0b9019, 6, Finished, Available, Finished)

### Function to load data to lakehouse
- Assumes you have already created and connected to the lakehouse in the notebook explorer 


In [5]:
def write_tables_to_lakehouse(all_tables, target_schema="SpaceParts"):
    """Write all tables to Lakehouse with standardized names"""
    for key, df in all_tables.items():
        try:
            # Create standardized table name
            target_table = f"{target_schema}.{key}"
            
            # Write to Lakehouse
            df.write.mode("overwrite")\
            .option("overwriteSchema", "true")\
            .saveAsTable(target_table)
            
            print(f"Successfully wrote table to {target_table}")
        except Exception as e:
            print(f"Error writing table {key} to Lakehouse: {str(e)}")

StatementMeta(, 21964a4d-c9bf-4958-b7ac-1b6f1c0b9019, 7, Finished, Available, Finished)

## Loading tables

In [6]:
# Usage example:
# Connection parameters
server = "te3-training-eu.database.windows.net"
database = "SpacePartsCoDW"
username = "dwreader" 
password = "TE3#reader!"

# Load all tables using pyodbc
all_tables = load_spaceparts(server, database, username, password)

StatementMeta(, 21964a4d-c9bf-4958-b7ac-1b6f1c0b9019, 8, Finished, Available, Finished)

Successfully loaded DimView.Brands
Successfully loaded DimView.Budget Rate
Successfully loaded DimView.Customers
Successfully loaded DimView.Employees
Successfully loaded DimView.Exchange Rate
Successfully loaded DimView.Invoice Document Type
Successfully loaded DimView.Order Document Type
Successfully loaded DimView.Order Status
Successfully loaded DimView.Products
Successfully loaded DimView.Regions
Successfully loaded FactView.Budget
Successfully loaded FactView.Forecast
Successfully loaded FactView.Invoices
Successfully loaded FactView.Orders


## Viewing a table

In [7]:
# Access individual tables
customers_df = all_tables['customers']
display(customers_df)

StatementMeta(, 21964a4d-c9bf-4958-b7ac-1b6f1c0b9019, 9, Finished, Available, Finished)

SynapseWidget(Synapse.DataFrame, cc25c126-3e9b-4b1a-a0a9-334ab4df2ac2)

## Writing SpaceParts tables to a lakehouse
- You must first attach a lakehouse to the notebook from the "Explorer" pane.

In [None]:
write_tables_to_lakehouse(all_tables, target_schema="SpaceParts")