# **[Tutorial: From Excel workbook to a Power BI report in Microsoft Teams](https://learn.microsoft.com/en-us/power-bi/create-reports/service-from-excel-to-stunning-report)**

<img src='https://learn.microsoft.com/en-us/power-bi/create-reports/media/service-from-excel-to-stunning-report/power-bi-financial-report-service.png'>

### **[Sample data? Download here](https://github.com/microsoft/powerbi-desktop-samples/blob/main/AdventureWorks%20Sales%20Sample/AdventureWorks%20Sales.xlsx)**

>> #### 3. Window 환경에세 python으로 다운 받는 방법

In [118]:
## LInux가 아닌 Window 환경에서 다운받는 법
import subprocess

# 다운로드할 파일의 URL
url = "https://github.com/microsoft/powerbi-desktop-samples/raw/main/AdventureWorks%20Sales%20Sample/AdventureWorks%20Sales.xlsx"

# 다운로드 명령어 실행
subprocess.run(["wget", url])

CompletedProcess(args=['wget', 'https://github.com/microsoft/powerbi-desktop-samples/raw/main/AdventureWorks%20Sales%20Sample/AdventureWorks%20Sales.xlsx'], returncode=0)

In [119]:
import pandas as pd
dfs = pd.read_excel('/content/AdventureWorks Sales.xlsx', sheet_name=None)

In [120]:
dfs.keys()

dict_keys(['Sales Order_data', 'Sales Territory_data', 'Sales_data', 'Reseller_data', 'Date_data', 'Product_data', 'Customer_data'])

<img src='https://miro.medium.com/v2/resize:fit:1100/format:webp/0*9UNgxNIu8-HgaxLa.png' width=600 height=600>

## **1. Saving dataframes to Duckdb**

In [121]:
dfs['Sales Order_data'].head()

Unnamed: 0,Channel,SalesOrderLineKey,Sales Order,Sales Order Line
0,Reseller,43659001,SO43659,SO43659 - 1
1,Reseller,43659002,SO43659,SO43659 - 2
2,Reseller,43659003,SO43659,SO43659 - 3
3,Reseller,43659004,SO43659,SO43659 - 4
4,Reseller,43659005,SO43659,SO43659 - 5


# **dfs 사전형 자료의 모든 데이터프레임을 duckdb의 테이블로 injection**

## <font color='blue'> **1. for loop 사용**

In [123]:
# Specify the path directly for clarity

import os
db_file_path = os.path.join(os.getcwd(), 'ancestor9.duckdb')
print(db_file_path)

/content/ancestor9.duckdb


In [145]:
# Connect to the DuckDB database (will create if it doesn't exist)

import duckdb
con = duckdb.connect(database=db_file_path, read_only=False)

In [146]:
# 왼쪽에 숨겨진파일 보기(eye)로 확인하거나 아래 명령어
print("Files in the current directory:", os.listdir())

Files in the current directory: ['.config', 'AdventureWorks Sales.xlsx.1', 'ancestor9.duckdb', 'AdventureWorks Sales.xlsx', '.ipynb_checkpoints', 'AdventureWorks Sales.xlsx.2', 'sample_data']


In [130]:
for table_name, df in dfs.items():
    print(table_name, df.shape)
    print(f"{table_name}, ----> {table_name.replace(' ', '').replace('_data', '')}")

Sales Order_data (121253, 4)
Sales Order_data, ----> SalesOrder
Sales Territory_data (11, 4)
Sales Territory_data, ----> SalesTerritory
Sales_data (121253, 15)
Sales_data, ----> Sales
Reseller_data (702, 8)
Reseller_data, ----> Reseller
Date_data (1461, 7)
Date_data, ----> Date
Product_data (397, 9)
Product_data, ----> Product
Customer_data (18485, 7)
Customer_data, ----> Customer


In [147]:
# Loop through each DataFrame in the dictionary

tables =[]

for table_name, df in dfs.items():
    # Sanitize table names for registration and SQL queries
    sanitized_table_name = table_name.replace(' ', '').replace('_data', '')

    # Create a new table in DuckDB from the registered DataFrame, df
    con.execute(f"CREATE TABLE {sanitized_table_name} AS SELECT * FROM df;")

    tables.append(sanitized_table_name)

# Close the DuckDB connection
# con.close()

In [148]:
%whos

Variable               Type                  Data/Info
------------------------------------------------------
con                    DuckDBPyConnection    <duckdb.duckdb.DuckDBPyCo<...>object at 0x7eff5ba93b70>
db_file_path           str                   /content/ancestor9.duckdb
df                     DataFrame                    CustomerKey       <...>n[18485 rows x 7 columns]
dfs                    dict                  n=7
duckdb                 module                <module 'duckdb' from '/u<...>ages/duckdb/__init__.py'>
mp                     module                <module 'memory_profiler'<...>ages/memory_profiler.py'>
my_df                  NoneType              None
my_relation            NoneType              None
os                     module                <module 'os' from '/usr/lib/python3.10/os.py'>
path                   str                   /content
pd                     module                <module 'pandas' from '/u<...>ages/pandas/__init__.py'>
result             

In [142]:
#del new_table_name
#del sanitized_table_name

In [149]:
# Example to verify one of the tables
result = con.execute("SELECT * FROM SalesOrder;").fetchdf()
result

Unnamed: 0,Channel,SalesOrderLineKey,Sales Order,Sales Order Line
0,Reseller,43659001,SO43659,SO43659 - 1
1,Reseller,43659002,SO43659,SO43659 - 2
2,Reseller,43659003,SO43659,SO43659 - 3
3,Reseller,43659004,SO43659,SO43659 - 4
4,Reseller,43659005,SO43659,SO43659 - 5
...,...,...,...,...
121248,Internet,75122001,SO75122,SO75122 - 1
121249,Internet,75122002,SO75122,SO75122 - 2
121250,Internet,75123001,SO75123,SO75123 - 1
121251,Internet,75123002,SO75123,SO75123 - 2


In [None]:
result.shape

(121253, 4)

In [152]:
for table in tables:
    print(f"Table: {table},  {result.shape}")
    result = con.execute(f"SELECT * FROM {table};").fetchdf()
    display(result.head())

Table: SalesOrder,  (18485, 7)


Unnamed: 0,Channel,SalesOrderLineKey,Sales Order,Sales Order Line
0,Reseller,43659001,SO43659,SO43659 - 1
1,Reseller,43659002,SO43659,SO43659 - 2
2,Reseller,43659003,SO43659,SO43659 - 3
3,Reseller,43659004,SO43659,SO43659 - 4
4,Reseller,43659005,SO43659,SO43659 - 5


Table: SalesTerritory,  (121253, 4)


Unnamed: 0,SalesTerritoryKey,Region,Country,Group
0,1,Northwest,United States,North America
1,2,Northeast,United States,North America
2,3,Central,United States,North America
3,4,Southwest,United States,North America
4,5,Southeast,United States,North America


Table: Sales,  (11, 4)


Unnamed: 0,SalesOrderLineKey,ResellerKey,CustomerKey,ProductKey,OrderDateKey,DueDateKey,ShipDateKey,SalesTerritoryKey,Order Quantity,Unit Price,Extended Amount,Unit Price Discount Pct,Product Standard Cost,Total Product Cost,Sales Amount
0,43659001,676,-1,349,20170702,20170712,20170709.0,5,1,2024.994,2024.994,0,1898.0944,1898.0944,2024.994
1,43659002,676,-1,350,20170702,20170712,20170709.0,5,3,2024.994,6074.982,0,1898.0944,5694.2832,6074.982
2,43659003,676,-1,351,20170702,20170712,20170709.0,5,1,2024.994,2024.994,0,1898.0944,1898.0944,2024.994
3,43659004,676,-1,344,20170702,20170712,20170709.0,5,1,2039.994,2039.994,0,1912.1544,1912.1544,2039.994
4,43659005,676,-1,345,20170702,20170712,20170709.0,5,1,2039.994,2039.994,0,1912.1544,1912.1544,2039.994


Table: Reseller,  (121253, 15)


Unnamed: 0,ResellerKey,Reseller ID,Business Type,Reseller,City,State-Province,Country-Region,Postal Code
0,-1,[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable]
1,1,AW00000001,Value Added Reseller,A Bike Store,Seattle,Washington,United States,98104
2,2,AW00000002,Specialty Bike Shop,Progressive Sports,Renton,Washington,United States,98055
3,3,AW00000003,Warehouse,Advanced Bike Components,Irving,Texas,United States,75061
4,4,AW00000004,Value Added Reseller,Modular Cycle Systems,Austin,Texas,United States,78701


Table: Date,  (702, 8)


Unnamed: 0,DateKey,Date,Fiscal Year,Fiscal Quarter,Month,Full Date,MonthKey
0,20170701,2017-07-01,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 01",201707
1,20170702,2017-07-02,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 02",201707
2,20170703,2017-07-03,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 03",201707
3,20170704,2017-07-04,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 04",201707
4,20170705,2017-07-05,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 05",201707


Table: Product,  (1461, 7)


Unnamed: 0,ProductKey,SKU,Product,Standard Cost,Color,List Price,Model,Subcategory,Category
0,210,FR-R92B-58,"HL Road Frame - Black, 58",868.6342,Black,1431.5,HL Road Frame,Road Frames,Components
1,211,FR-R92R-58,"HL Road Frame - Red, 58",868.6342,Red,1431.5,HL Road Frame,Road Frames,Components
2,212,HL-U509-R,"Sport-100 Helmet, Red",12.0278,Red,33.6442,Sport-100,Helmets,Accessories
3,213,HL-U509-R,"Sport-100 Helmet, Red",13.8782,Red,33.6442,Sport-100,Helmets,Accessories
4,214,HL-U509-R,"Sport-100 Helmet, Red",13.0863,Red,34.99,Sport-100,Helmets,Accessories


Table: Customer,  (397, 9)


Unnamed: 0,CustomerKey,Customer ID,Customer,City,State-Province,Country-Region,Postal Code
0,-1,[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable]
1,11000,AW00011000,Jon Yang,Rockhampton,Queensland,Australia,4700
2,11001,AW00011001,Eugene Huang,Seaford,Victoria,Australia,3198
3,11002,AW00011002,Ruben Torres,Hobart,Tasmania,Australia,7001
4,11003,AW00011003,Christy Zhu,North Ryde,New South Wales,Australia,2113


## <font color='blue'> **2. try ~ finnaly 구문 사용**

In [153]:
'''

try:
    for table_name, df in dfs.items():
        sanitized_table_name = table_name.replace(' ', '').replace('_data', '')
        con.register(sanitized_table_name, df)
        con.execute(f"CREATE TABLE IF NOT EXISTS {sanitized_table_name} AS SELECT * FROM {sanitized_table_name};")

    result = con.execute("SELECT * FROM SalesOrder;").fetchdf()
    print(result)
finally:
    con.close()
'''

'\n\ntry:\n    for table_name, df in dfs.items():\n        sanitized_table_name = table_name.replace(\' \', \'\').replace(\'_data\', \'\')\n        con.register(sanitized_table_name, df)\n        con.execute(f"CREATE TABLE IF NOT EXISTS {sanitized_table_name} AS SELECT * FROM {sanitized_table_name};")\n\n    result = con.execute("SELECT * FROM SalesOrder;").fetchdf()\n    print(result)\nfinally:\n    con.close()\n'

## <font color='blue'> **3. with statement 구문 사용**

In [154]:
# New DuckDB
db_file_path = os.path.join(os.getcwd(), 'zoro.duckdb')
print(db_file_path)


/content/zoro.duckdb


In [156]:
# con = duckdb.connect(database=db_file_path, read_only=False)

with duckdb.connect(database=db_file_path, read_only=False) as con:

    for table_name, df in dfs.items():
        # Register each DataFrame with DuckDB
        con.register(table_name, df)

        # Sanitize table names for registration and SQL queries
        target_table_name = table_name.replace(' ', '').replace('_data', '')

        # Drop table if it exists
        con.execute(f'DROP TABLE IF EXISTS "{target_table_name}";')

        # Create a new table in DuckDB from the registered DataFrame, df
        con.execute(f"CREATE TABLE {target_table_name} AS SELECT * FROM df;")

        # Verify by querying the data back
        result = con.execute(f'SELECT * FROM "{target_table_name}";').fetchdf()
        print(target_table_name, result.shape)
        display(result.head())

SalesOrder (121253, 4)


Unnamed: 0,Channel,SalesOrderLineKey,Sales Order,Sales Order Line
0,Reseller,43659001,SO43659,SO43659 - 1
1,Reseller,43659002,SO43659,SO43659 - 2
2,Reseller,43659003,SO43659,SO43659 - 3
3,Reseller,43659004,SO43659,SO43659 - 4
4,Reseller,43659005,SO43659,SO43659 - 5


SalesTerritory (11, 4)


Unnamed: 0,SalesTerritoryKey,Region,Country,Group
0,1,Northwest,United States,North America
1,2,Northeast,United States,North America
2,3,Central,United States,North America
3,4,Southwest,United States,North America
4,5,Southeast,United States,North America


Sales (121253, 15)


Unnamed: 0,SalesOrderLineKey,ResellerKey,CustomerKey,ProductKey,OrderDateKey,DueDateKey,ShipDateKey,SalesTerritoryKey,Order Quantity,Unit Price,Extended Amount,Unit Price Discount Pct,Product Standard Cost,Total Product Cost,Sales Amount
0,43659001,676,-1,349,20170702,20170712,20170709.0,5,1,2024.994,2024.994,0,1898.0944,1898.0944,2024.994
1,43659002,676,-1,350,20170702,20170712,20170709.0,5,3,2024.994,6074.982,0,1898.0944,5694.2832,6074.982
2,43659003,676,-1,351,20170702,20170712,20170709.0,5,1,2024.994,2024.994,0,1898.0944,1898.0944,2024.994
3,43659004,676,-1,344,20170702,20170712,20170709.0,5,1,2039.994,2039.994,0,1912.1544,1912.1544,2039.994
4,43659005,676,-1,345,20170702,20170712,20170709.0,5,1,2039.994,2039.994,0,1912.1544,1912.1544,2039.994


Reseller (702, 8)


Unnamed: 0,ResellerKey,Reseller ID,Business Type,Reseller,City,State-Province,Country-Region,Postal Code
0,-1,[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable]
1,1,AW00000001,Value Added Reseller,A Bike Store,Seattle,Washington,United States,98104
2,2,AW00000002,Specialty Bike Shop,Progressive Sports,Renton,Washington,United States,98055
3,3,AW00000003,Warehouse,Advanced Bike Components,Irving,Texas,United States,75061
4,4,AW00000004,Value Added Reseller,Modular Cycle Systems,Austin,Texas,United States,78701


Date (1461, 7)


Unnamed: 0,DateKey,Date,Fiscal Year,Fiscal Quarter,Month,Full Date,MonthKey
0,20170701,2017-07-01,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 01",201707
1,20170702,2017-07-02,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 02",201707
2,20170703,2017-07-03,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 03",201707
3,20170704,2017-07-04,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 04",201707
4,20170705,2017-07-05,FY2018,FY2018 Q1,2017 Jul,"2017 Jul, 05",201707


Product (397, 9)


Unnamed: 0,ProductKey,SKU,Product,Standard Cost,Color,List Price,Model,Subcategory,Category
0,210,FR-R92B-58,"HL Road Frame - Black, 58",868.6342,Black,1431.5,HL Road Frame,Road Frames,Components
1,211,FR-R92R-58,"HL Road Frame - Red, 58",868.6342,Red,1431.5,HL Road Frame,Road Frames,Components
2,212,HL-U509-R,"Sport-100 Helmet, Red",12.0278,Red,33.6442,Sport-100,Helmets,Accessories
3,213,HL-U509-R,"Sport-100 Helmet, Red",13.8782,Red,33.6442,Sport-100,Helmets,Accessories
4,214,HL-U509-R,"Sport-100 Helmet, Red",13.0863,Red,34.99,Sport-100,Helmets,Accessories


Customer (18485, 7)


Unnamed: 0,CustomerKey,Customer ID,Customer,City,State-Province,Country-Region,Postal Code
0,-1,[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable],[Not Applicable]
1,11000,AW00011000,Jon Yang,Rockhampton,Queensland,Australia,4700
2,11001,AW00011001,Eugene Huang,Seaford,Victoria,Australia,3198
3,11002,AW00011002,Ruben Torres,Hobart,Tasmania,Australia,7001
4,11003,AW00011003,Christy Zhu,North Ryde,New South Wales,Australia,2113


## <font color='blue'> **4. pickle dump & load**

# **Appendix**
## **memory DB**
 - con = duckdb.connect(database=':memory:', read_only=False)

In [None]:
'''
# Create a connection to DuckDB in-memory database
con = duckdb.connect(database=':memory:', read_only=False)

# DataFrame is registered as a virtual table within your DuckDB session with DuckDB's Python API:
con.register('sales_order_data', dfs['Sales Order_data'])

# Save the DataFrame to DuckDB
# 'sales' Duckdb만들고 'salesorder' 테이블 만들기
con.execute("CREATE TABLE SalesOrder AS SELECT * FROM sales_order_data;")

# Verify by querying the data back
result = con.execute("SELECT * FROM SalesOrder;").fetchdf()

# Uncomment the following lines when ready to finalize and test the code
result
'''

In [132]:
con.close()

In [133]:
# Delete a DataFrame
my_df = None

# Delete a Relation
my_relation = None

In [136]:
! pip install memory_profiler

import memory_profiler as mp

with mp.profile():
    # Execute your code here

    # Analyze memory usage
    mp.print_profile()

Collecting memory_profiler
  Downloading memory_profiler-0.61.0-py3-none-any.whl (31 kB)
Installing collected packages: memory_profiler
Successfully installed memory_profiler-0.61.0


AttributeError: __enter__

## **duck DB 만들기 file path**

In [None]:
# 방법 1
import os
path = os.getcwd()
f'{path}/ancestor9.duckdb'


# Path to your DuckDB database file
# db_file_path = f'{path}/ancestor9.duckdb'
# db_file_path = 'ancestor9.duckdb'

# Connect to DuckDB database (will create if it doesn't exist)
# con = duckdb.connect(database=db_file_path, read_only=False)