**Non-Execution**

## With connect duckdb (Not execute method)

In [None]:
import duckdb
import pandas as pd

# ✅ Step 1: Connect to DuckDB file database
conn = duckdb.connect('inventory.db')  # <-- Your database is now 'inventory.db'

# ✅ Step 2: CSV to Table mapping
csv_files = {
    "begin_inventory.csv": "begin_inventory",
    "end_inventory.csv": "end_inventory",
    "purchase_prices.csv": "purchase_prices",
    "purchases.csv": "purchases",
    "sales.csv": "sales",
    "vendor_invoice.csv": "vendor_invoice"
}

# ✅ Step 3: Load and save each CSV as a DuckDB table
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table, conn, if_exists='replace', index=False)
        print(f"✅ Imported {file} into table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped {file} — file not uploaded")

# ✅ Step 4: Reusable SQL query function
def run_query(query):
    return pd.read_sql_query(query, conn)

# ✅ Step 5: Show all tables and sample data
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print('Count of records:', count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs imported into DuckDB database 'inventory.db'")

## For google collab 👇👇

In [None]:
# ✅ Install DuckDB in Google Colab (only once)
!pip install duckdb --quiet

# ✅ Import required libraries
import duckdb
import pandas as pd
from google.colab import files  # for manual CSV uploads

# ✅ Step 1: Upload your CSVs (manual upload in Colab UI)
uploaded = files.upload()  # Upload the 6 CSVs here

# ✅ Step 2: Connect to DuckDB (will create 'inventory.db' in Colab's filesystem)
conn = duckdb.connect('inventory.db')

# ✅ Step 3: Define filename-to-table mapping
csv_files = {
    "begin_inventory.csv": "begin_inventory",
    "end_inventory.csv": "end_inventory",
    "purchase_prices.csv": "purchase_prices",
    "purchases.csv": "purchases",
    "sales.csv": "sales",
    "vendor_invoice.csv": "vendor_invoice"
}

# ✅ Step 4: Read CSVs and write to DuckDB
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table, conn, if_exists='replace', index=False)
        print(f"✅ Imported {file} into table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped {file} — file not uploaded")

# ✅ Step 5: Reusable query runner
def run_query(query):
    return pd.read_sql_query(query, conn)

# ✅ Step 6: Show tables and sample data
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print('Count of records:', count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs successfully imported into DuckDB database 'inventory.db'")

## With no connect 👇👇

Using df.to_sql instead of duckdb.register👇👇👇 VSCode

In [None]:
import duckdb
import pandas as pd

# ✅ Step 1: Define CSV filename-to-table mapping
csv_files = {
    "begin_inventory.csv": "begin_inventory",
    "end_inventory.csv": "end_inventory",
    "purchase_prices.csv": "purchase_prices",
    "purchases.csv": "purchases",
    "sales.csv": "sales",
    "vendor_invoice.csv": "vendor_invoice"
}

# ✅ Step 2: Read and write each CSV as a DuckDB table
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table, duckdb.connect(), if_exists='replace')
        print(f"✅ Loaded '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file}' — file not found")

# ✅ Step 3: Reusable SQL query runner
def run_query(query):
    return duckdb.query(query).to_df()

# ✅ Step 4: Show tables and display sample records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs loaded into in-memory DuckDB successfully!")

## In Collab 👇👇

In [None]:
# ✅ Step 1: Install DuckDB (if not already)
!pip install duckdb --quiet

# ✅ Step 2: Import required libraries
import duckdb
import pandas as pd
from google.colab import files

# ✅ Step 3: Upload CSV files manually from your local machine
uploaded = files.upload()  # Upload the 6 CSVs when prompted

# ✅ Step 4: Map filenames to DuckDB table names
csv_files = {
    "begin_inventory.csv": "begin_inventory",
    "end_inventory.csv": "end_inventory",
    "purchase_prices.csv": "purchase_prices",
    "purchases.csv": "purchases",
    "sales.csv": "sales",
    "vendor_invoice.csv": "vendor_invoice"
}

# ✅ Step 5: Read each CSV and write to in-memory DuckDB tables
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table, duckdb.connect(), if_exists='replace')
        print(f"✅ Loaded '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file}' — file not found")

# ✅ Step 6: Define a reusable query runner
def run_query(query):
    return duckdb.query(query).to_df()

# ✅ Step 7: Show all tables and preview sample data
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print('Count of records:', count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All uploaded CSVs loaded into in-memory DuckDB session!")

## ---Using df.to_sql instead of duckdb.register👇👇👇 VSCode

In [None]:
import duckdb
import pandas as pd

# ✅ Shared in-memory DuckDB connection
conn = duckdb.connect()  # Optional: use 'inventory.duckdb' to persist

# ✅ Define CSV filename-to-table mapping
csv_files = {
    "begin_inventory.csv": "begin_inventory",
    "end_inventory.csv": "end_inventory",
    "purchase_prices.csv": "purchase_prices",
    "purchases.csv": "purchases",
    "sales.csv": "sales",
    "vendor_invoice.csv": "vendor_invoice"
}

# ✅ Read and write each CSV as a DuckDB table
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table, conn, if_exists='replace')  # 🧠 shared connection
        print(f"✅ Loaded '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file}' — file not found")

# ✅ Reusable SQL query runner using pandas
def run_query(query):
    return pd.read_sql_query(query, conn)

# ✅ Show tables and display sample records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs loaded into shared DuckDB session successfully!")


## In collab 👇👇👇

In [None]:
# ✅ Step 1: Install DuckDB (if not already installed)
!pip install duckdb --quiet

# ✅ Step 2: Import required libraries
import duckdb
import pandas as pd
from google.colab import files

# ✅ Step 3: Upload multiple CSV files from your local system
uploaded = files.upload()  # Upload the 6 inventory CSVs when prompted

# ✅ Step 4: Shared in-memory DuckDB connection
conn = duckdb.connect()  # Use 'inventory.duckdb' for persistent file

# ✅ Step 5: Map uploaded filenames to DuckDB table names
csv_files = {
    "begin_inventory.csv": "begin_inventory",
    "end_inventory.csv": "end_inventory",
    "purchase_prices.csv": "purchase_prices",
    "purchases.csv": "purchases",
    "sales.csv": "sales",
    "vendor_invoice.csv": "vendor_invoice"
}

# ✅ Step 6: Load each CSV into DuckDB
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table, conn, if_exists='replace')  # Use shared connection
        print(f"✅ Loaded '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file}' — file not found")

# ✅ Step 7: Reusable query function
def run_query(query):
    return pd.read_sql_query(query, conn)

# ✅ Step 8: Show tables and preview data
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All uploaded CSVs successfully loaded into DuckDB in Colab!")


## Single csv 👇👇Vscode

In [None]:
import duckdb
import pandas as pd

# ✅ Step 1: Connect once to a DuckDB in-memory or persistent database
conn = duckdb.connect()  # Use duckdb.connect('inventory.db') for persistent

# ✅ Step 2: CSV file and table name
csv_file = "begin_inventory.csv"
table_name = "begin_inventory"

# ✅ Step 3: Read CSV and write to DuckDB using df.to_sql()
try:
    df = pd.read_csv(csv_file)
    df.to_sql(table_name, conn, if_exists='replace')
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ Skipped '{csv_file}' — file not found")

# ✅ Step 4: Query runner using pandas.read_sql_query
def run_query(query):
    return pd.read_sql_query(query, conn)

# ✅ Step 5: Show tables and sample records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 CSV loaded into DuckDB successfully!")


## Google collab 👇👇

In [None]:
# ✅ Step 1: Install DuckDB
!pip install duckdb --quiet

# ✅ Step 2: Import required libraries
import duckdb
import pandas as pd
from google.colab import files

# ✅ Step 3: Upload a single CSV file from your local system
uploaded = files.upload()  # Upload when prompted (e.g., begin_inventory.csv)

# ✅ Step 4: Connect to DuckDB (in-memory or to a file)
conn = duckdb.connect()  # Use duckdb.connect('inventory.db') for file persistence

# ✅ Step 5: Read the CSV and write to DuckDB using df.to_sql()
csv_file = "begin_inventory.csv"  # Change to your uploaded filename
table_name = "begin_inventory"

try:
    df = pd.read_csv(csv_file)
    df.to_sql(table_name, conn, if_exists='replace')
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ Skipped '{csv_file}' — file not found")

# ✅ Step 6: Define reusable query function using pandas
def run_query(query):
    return pd.read_sql_query(query, conn)

# ✅ Step 7: Show tables and sample data
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 CSV loaded into DuckDB successfully in Colab!")


## For kaggle without execute process 👇👇

In [None]:
import duckdb
import pandas as pd

# ✅ Step 1: Connect to DuckDB file-based database (will be created in the output)
conn = duckdb.connect('inventory.db')  # Persists as inventory.db in the working directory

# ✅ Step 2: Kaggle-style CSV input path (update your dataset folder name here)
input_path = "../input/your-dataset-folder-name/"

# ✅ Step 3: CSV file to DuckDB table mapping
csv_files = {
    "begin_inventory.csv": "begin_inventory",
    "end_inventory.csv": "end_inventory",
    "purchase_prices.csv": "purchase_prices",
    "purchases.csv": "purchases",
    "sales.csv": "sales",
    "vendor_invoice.csv": "vendor_invoice"
}

# ✅ Step 4: Load each CSV and write to DuckDB
for file, table in csv_files.items():
    full_path = input_path + file
    try:
        df = pd.read_csv(full_path)
        df.to_sql(table, conn, if_exists='replace', index=False)
        print(f"✅ Imported '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ File not found: {full_path}")

# ✅ Step 5: Query runner using pandas
def run_query(query):
    return pd.read_sql_query(query, conn)

# ✅ Step 6: Show tables and sample rows
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs successfully loaded into DuckDB → inventory.db")


In [None]:
import duckdb
import pandas as pd

# ✅ Step 1: Connect to persistent DuckDB file
conn = duckdb.connect("inventory.db")  # This will be created in Kaggle's working directory

# ✅ Step 2: Absolute CSV file paths (Kaggle-style)
csv_files = {
    "/kaggle/input/inventory-dataset/begin_inventory.csv": "begin_inventory",
    "/kaggle/input/inventory-dataset/end_inventory.csv": "end_inventory",
    "/kaggle/input/inventory-dataset/purchase_prices.csv": "purchase_prices",
    "/kaggle/input/inventory-dataset/purchases.csv": "purchases",
    "/kaggle/input/inventory-dataset/sales.csv": "sales",
    "/kaggle/input/inventory-dataset/vendor_invoice.csv": "vendor_invoice"
}

# ✅ Step 3: Read and save each CSV to DuckDB
for file_path, table_name in csv_files.items():
    try:
        df = pd.read_csv(file_path)
        df.to_sql(table_name, conn, if_exists='replace', index=False)
        print(f"✅ Imported '{file_path}' as table '{table_name}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file_path}' — file not found")

# ✅ Step 4: Reusable SQL query function
def run_query(query):
    return pd.read_sql_query(query, conn)

# ✅ Step 5: Show tables and preview records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All inventory CSVs loaded into DuckDB file: 'inventory.db'")


pd.read_sql shows user future warnings


**Execute Pattern**

## In VSCODE 👇👇Execute pattern for connect db

In [None]:
import duckdb
import pandas as pd

# ✅ Step 1: Define CSV files and their target table names
csv_files = {
    "video_game_sales.csv": "video_game_sales",
    "game_reviews.csv": "game_reviews",
    "console_specs.csv": "console_specs"
}

# ✅ Step 2: Connect to a persistent DuckDB database
conn = duckdb.connect("gaming_data.duckdb")  # Creates the file if not exists

# ✅ Step 3: Load each CSV and write it as a DuckDB table using df.to_sql()
for file, table_name in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table_name, conn, if_exists='replace')
        print(f"✅ Saved '{file}' to DuckDB table '{table_name}'")
    except FileNotFoundError:
        print(f"⚠️ File '{file}' not found. Skipping...")

print("🎉 All CSV files successfully saved into 'gaming_data.duckdb'!")

# ✅ Step 4: Define a reusable query function
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Step 5: Example Query - Total Global Sales by Platform
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# ✅ Step 6: Run and display the result
result = run_query(query)
print(result)

## In collab 👇👇

In [None]:
# ✅ Step 1: Install DuckDB if needed
!pip install duckdb --quiet

# ✅ Step 2: Import required libraries
import duckdb
import pandas as pd
from google.colab import files

# ✅ Step 3: Upload CSV files from your local machine
uploaded = files.upload()  # Upload your CSVs when prompted

# ✅ Step 4: Define CSV file to table name mapping
csv_files = {
    "video_game_sales.csv": "video_game_sales",
    "game_reviews.csv": "game_reviews",
    "console_specs.csv": "console_specs"
}

# ✅ Step 5: Connect to a persistent DuckDB database
conn = duckdb.connect("gaming_data.duckdb")  # Saved in Colab session storage

# ✅ Step 6: Read each CSV and store as a DuckDB table using df.to_sql()
for file, table_name in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table_name, conn, if_exists='replace')
        print(f"✅ Saved '{file}' to DuckDB table '{table_name}'")
    except FileNotFoundError:
        print(f"⚠️ '{file}' not found. Skipping...")

print("🎉 All uploaded CSVs saved into 'gaming_data.duckdb'")

# ✅ Step 7: Define reusable query runner
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Step 8: Sample query — total global sales by platform
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# ✅ Step 9: Run and display result
result = run_query(query)
print(result)

## In Vscode (execute) without connection db explicitly
👇👇

In [None]:
import duckdb
import pandas as pd

# ✅ Create a shared in-memory DuckDB connection
conn = duckdb.connect()

# ✅ Dictionary of CSV files and their table names
csv_files = {
    "video_game_sales.csv": "video_game_sales",
    "game_reviews.csv": "game_reviews",
    "console_specs.csv": "console_specs"
}

# ✅ Load CSVs into DataFrames and store them in DuckDB using df.to_sql()
for file, table_name in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table_name, conn, if_exists='replace')  # Save into shared DuckDB connection
        print(f"✅ Loaded '{file}' as table '{table_name}'")
    except FileNotFoundError:
        print(f"⚠️ File '{file}' not found. Skipping...")

# ✅ Reusable query runner using the shared connection
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Example query: Total global sales by platform
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# ✅ Run and display result
result = run_query(query)
result

## In Collab 👇👇

In [None]:
# ✅ Step 1: Install DuckDB
!pip install duckdb --quiet

# ✅ Step 2: Import libraries
import duckdb
import pandas as pd
from google.colab import files

# ✅ Step 3: Upload CSVs from your local machine
uploaded = files.upload()  # Upload CSVs when prompted

# ✅ Step 4: Define CSV-to-table mapping
csv_files = {
    "video_game_sales.csv": "video_game_sales",
    "game_reviews.csv": "game_reviews",
    "console_specs.csv": "console_specs"
}

# ✅ Step 5: Create a shared DuckDB in-memory connection
conn = duckdb.connect()

# ✅ Step 6: Load CSVs and save to DuckDB using df.to_sql()
for file, table_name in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table_name, conn, if_exists='replace')
        print(f"✅ Loaded '{file}' as table '{table_name}'")
    except FileNotFoundError:
        print(f"⚠️ File '{file}' not found. Skipping...")

# ✅ Step 7: Reusable query runner
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Step 8: Example query – Total global sales by platform
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# ✅ Step 9: Run and display result
result = run_query(query)
result

## Single csv  in google collab (without connection db explicitly)👇👇👇

In [None]:
# ✅ Step 1: Install DuckDB
!pip install duckdb --quiet

# ✅ Step 2: Import libraries
import duckdb
import pandas as pd
from google.colab import files

# ✅ Step 3: Upload a single CSV file
uploaded = files.upload()  # Upload one CSV file like 'video_game_sales.csv'

# ✅ Step 4: Define filename and table name
file = "video_game_sales.csv"
table_name = "video_game_sales"

# ✅ Step 5: Create a DuckDB in-memory connection
conn = duckdb.connect()

# ✅ Step 6: Read the CSV and save it as a table
try:
    df = pd.read_csv(file)
    df.to_sql(table_name, conn, if_exists='replace')
    print(f"✅ Loaded '{file}' as DuckDB table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ File '{file}' not found. Skipping...")

# ✅ Step 7: Reusable SQL query function
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Step 8: Example query – Total global sales by platform
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# ✅ Step 9: Run and show result
result = run_query(query)
result

## In VScode 👇👇

In [None]:
# ✅ Step 1: Import necessary libraries
import duckdb
import pandas as pd

# ✅ Step 2: Define the CSV file and table name
file = "video_game_sales.csv"
table_name = "video_game_sales"

# ✅ Step 3: Create an in-memory DuckDB connection (or save to file)
conn = duckdb.connect()  # or duckdb.connect("gaming_data.duckdb") to persist

# ✅ Step 4: Load the CSV and save it to DuckDB as a table
try:
    df = pd.read_csv(file)
    df.to_sql(table_name, conn, if_exists='replace')
    print(f"✅ Loaded '{file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ File '{file}' not found.")

# ✅ Step 5: Define a reusable query function
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Step 6: Example SQL query
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# ✅ Step 7: Execute and display results
result = run_query(query)
print(result)

## In kaggle 👇👇

In [None]:
# ✅ Step 1: Install DuckDB (Kaggle already has it preinstalled, but safe to include)
!pip install duckdb --quiet

# ✅ Step 2: Import required libraries
import duckdb
import pandas as pd

# ✅ Step 3: Load the CSV file
file = "/kaggle/input/video-game-sales-data/video_game_sales.csv"  # adjust if needed
table_name = "video_game_sales"

df = pd.read_csv(file)
print(f"✅ Loaded CSV with {len(df)} rows.")

# ✅ Step 4: Load DataFrame into DuckDB table using df.to_sql
conn = duckdb.connect()  # In-memory DuckDB
df.to_sql(table_name, conn, if_exists='replace')
print(f"✅ DataFrame saved as DuckDB table '{table_name}'")

# ✅ Step 5: Define a query runner
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Step 6: Example SQL query
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# ✅ Step 7: Run and show result
result = run_query(query)
result.head(10)

👇👇👇

In [None]:
# ✅ Step 1: Install DuckDB (optional in Kaggle)
!pip install duckdb --quiet

# ✅ Step 2: Import libraries
import duckdb
import pandas as pd

# ✅ Step 3: Define your file and table name
file = "/kaggle/input/inventory-data/sales.csv"  # ✅ Change to your actual CSV path
table_name = "sales"

# ✅ Step 4: Connect to DuckDB file (creates inventory.db)
conn = duckdb.connect("inventory.db")  # This creates a persistent file in the working directory

# ✅ Step 5: Read CSV and load into DuckDB using df.to_sql
df = pd.read_csv(file)
df.to_sql(table_name, conn, if_exists='replace')
print(f"✅ '{file}' loaded into 'inventory.db' as table '{table_name}'")

# ✅ Step 6: Define reusable SQL query function
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Step 7: Example query
query = """
SELECT Product, SUM(Quantity) AS total_sold
FROM sales
GROUP BY Product
ORDER BY total_sold DESC
"""

# ✅ Step 8: Run and display result
result = run_query(query)
result.head()

# Multicsv:👇👇

In [None]:
# ✅ Step 1: Install DuckDB (Kaggle already has it, just in case)
!pip install duckdb --quiet

# ✅ Step 2: Import libraries
import duckdb
import pandas as pd

# ✅ Step 3: Define CSV files and their corresponding table names
csv_files = {
    "/kaggle/input/inventory-dataset/begin_inventory.csv": "begin_inventory",
    "/kaggle/input/inventory-dataset/end_inventory.csv": "end_inventory",
    "/kaggle/input/inventory-dataset/purchase_prices.csv": "purchase_prices",
    "/kaggle/input/inventory-dataset/purchases.csv": "purchases",
    "/kaggle/input/inventory-dataset/sales.csv": "sales",
    "/kaggle/input/inventory-dataset/vendor_invoice.csv": "vendor_invoice"
}

# ✅ Step 4: Connect to DuckDB file (persistent)
conn = duckdb.connect("inventory.db")

# ✅ Step 5: Read CSVs and load into DuckDB using to_sql
for file, table in csv_files.items():
    df = pd.read_csv(file)
    df.to_sql(table, conn, if_exists='replace')
    print(f"✅ Saved '{file}' to table '{table}' in inventory.db")

# ✅ Step 6: Optional query example
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 Sample Query
result = run_query("SELECT COUNT(*) FROM sales")
print(result)

## ✅ Final Suggestion (Clean Version Without Warning)

In [None]:
import duckdb
import pandas as pd

conn = duckdb.connect()

csv_files = {
    "customers.csv": "customers",
    "order_items.csv": "order_items",
    "orders.csv": "orders",
    "payments.csv": "payments",
    "products.csv": "products"
}

for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        conn.register(f"{table}_df", df)
        conn.execute(f"CREATE OR REPLACE TABLE {table} AS SELECT * FROM {table}_df")
        print(f"✅ Loaded '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file}' — file not found")

def run_query(query):
    return conn.execute(query).fetchdf()

tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs loaded into DuckDB with no warnings!")


## IN Collab

In [None]:
# ✅ Step 1: Install DuckDB
!pip install duckdb --quiet

# ✅ Step 2: Import libraries
import duckdb
import pandas as pd
from google.colab import files
from IPython.display import display

# ✅ Step 3: Upload CSV files manually when prompted
uploaded = files.upload()  # 📂 Upload multiple files like customers.csv, orders.csv, etc.

# ✅ Step 4: Connect to DuckDB (in-memory)
conn = duckdb.connect()

# ✅ Step 5: CSV filename-to-table mapping
csv_files = {
    "customers.csv": "customers",
    "order_items.csv": "order_items",
    "orders.csv": "orders",
    "payments.csv": "payments",
    "products.csv": "products"
}

# ✅ Step 6: Register CSVs as DuckDB tables
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        conn.register(f"{table}_df", df)
        conn.execute(f"CREATE OR REPLACE TABLE {table} AS SELECT * FROM {table}_df")
        print(f"✅ Loaded '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file}' — file not found")

# ✅ Step 7: Reusable query function
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Step 8: Show tables and display sample records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs loaded into DuckDB with no warnings!")


## In kaggle

In [None]:
# ✅ Step 1: Install DuckDB (usually preinstalled in Kaggle, but safe to include)
!pip install duckdb --quiet

# ✅ Step 2: Import libraries
import duckdb
import pandas as pd
from IPython.display import display

# ✅ Step 3: Connect to DuckDB (in-memory or use 'your_file.duckdb' to persist)
conn = duckdb.connect()

# ✅ Step 4: CSV filename-to-table mapping
# 🔁 Make sure these paths match your dataset folder in /kaggle/input/
csv_files = {
    "/kaggle/input/your-dataset-folder/customers.csv": "customers",
    "/kaggle/input/your-dataset-folder/order_items.csv": "order_items",
    "/kaggle/input/your-dataset-folder/orders.csv": "orders",
    "/kaggle/input/your-dataset-folder/payments.csv": "payments",
    "/kaggle/input/your-dataset-folder/products.csv": "products"
}

# ✅ Step 5: Register CSVs as DuckDB tables
for file_path, table_name in csv_files.items():
    try:
        df = pd.read_csv(file_path)
        conn.register(f"{table_name}_df", df)
        conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM {table_name}_df")
        print(f"✅ Loaded '{file_path}' into DuckDB as table '{table_name}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file_path}' — file not found")

# ✅ Step 6: Reusable SQL query function
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Step 7: Show tables and sample records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs loaded into DuckDB with no warnings!")


## Single Csv

# VSCode

In [None]:
import duckdb
import pandas as pd
from IPython.display import display

# ✅ Step 1: Connect to DuckDB (in-memory or use 'inventory.duckdb' to persist)
conn = duckdb.connect()  # or duckdb.connect('inventory.duckdb')

# ✅ Step 2: Define CSV file and table name
csv_file = "customers.csv"          # 🔁 Change this as needed
table_name = "customers"

# ✅ Step 3: Load CSV and write to DuckDB using DuckDB-native method
try:
    df = pd.read_csv(csv_file)
    conn.register(f"{table_name}_df", df)  # 🧠 Register DataFrame using table_name
    conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM {table_name}_df")
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ Skipped '{csv_file}' — file not found")

# ✅ Step 4: Reusable SQL query function
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Step 5: Show tables and sample records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 Single CSV loaded into DuckDB successfully!")



## IN collab

In [None]:
# ✅ Step 1: Install DuckDB in Colab (if not already)
!pip install duckdb --quiet

# ✅ Step 2: Import required libraries
import duckdb
import pandas as pd
from google.colab import files
from IPython.display import display

# ✅ Step 3: Upload CSV file manually from local machine
uploaded = files.upload()  # 📂 Prompt to upload e.g., customers.csv

# ✅ Step 4: Define filename and target table name
csv_file = "customers.csv"       # 👈 Change to match uploaded filename
table_name = "customers"

# ✅ Step 5: Connect to DuckDB (in-memory or persistent if needed)
conn = duckdb.connect()  # Or use: duckdb.connect('inventory.duckdb')

# ✅ Step 6: Read, register, and create DuckDB table
try:
    df = pd.read_csv(csv_file)
    conn.register(f"{table_name}_df", df)  # 🔁 Dynamic DataFrame registration
    conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM {table_name}_df")
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ File '{csv_file}' not found. Please upload the correct file.")

# ✅ Step 7: Define SQL query runner
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Step 8: Show tables and preview data
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 Single CSV loaded into DuckDB successfully in Colab!")


## In kaggle

In [None]:
# ✅ Step 1: Install DuckDB (already available in Kaggle, but safe to include)
!pip install duckdb --quiet

# ✅ Step 2: Import required libraries
import duckdb
import pandas as pd
from IPython.display import display

# ✅ Step 3: Define path to your CSV file (e.g., in /kaggle/input/...)
csv_file = "/kaggle/input/your-dataset-folder/customers.csv"  # ⛔ CHANGE this to match your dataset path
table_name = "customers"

# ✅ Step 4: Connect to DuckDB (in-memory or save as .duckdb file)
conn = duckdb.connect()  # or duckdb.connect('inventory.duckdb')

# ✅ Step 5: Read, register, and create table
try:
    df = pd.read_csv(csv_file)
    conn.register(f"{table_name}_df", df)
    conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM {table_name}_df")
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ File '{csv_file}' not found. Make sure the path is correct.")

# ✅ Step 6: Define query runner
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Step 7: Show tables and preview data
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 Single CSV loaded into DuckDB successfully in Kaggle!")


In [None]:
# AFTER STEP-6

# 🎯 Query: Total global sales by platform
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# ✅ Execute and print result
result = run_query(query)
print("\n🎮 Total Global Sales by Platform:\n")
print(result)

# ✅ Full Example with Warning Suppressed

In [None]:
import duckdb
import pandas as pd
import warnings
from IPython.display import display

# ✅ Suppress UserWarnings (e.g., from pandas with DuckDB connection)
warnings.filterwarnings("ignore", category=UserWarning)

# ✅ Connect to DuckDB
conn = duckdb.connect()

# ✅ Example CSV (change path as needed)
csv_file = "customers.csv"
table_name = "customers"

try:
    df = pd.read_csv(csv_file)
    conn.register(f"{table_name}_df", df)
    conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM {table_name}_df")
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ File '{csv_file}' not found.")

# ✅ SQL query function
def run_query(query):
    return pd.read_sql_query(query, conn)  # ✅ Warning suppressed here

# ✅ Show tables and preview
tables = run_query("SHOW TABLES")
for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All done with warnings suppressed!")


In [None]:
import duckdb
import pandas as pd
import warnings
from IPython.display import display

# ✅ Suppress UserWarnings (e.g., from pandas with DuckDB connection)
warnings.filterwarnings("ignore", category=UserWarning)

# ✅ Connect to DuckDB
conn = duckdb.connect()

# ✅ Example CSV (change path as needed)
csv_file = "customers.csv"
table_name = "customers"

try:
    df = pd.read_csv(csv_file)
    conn.register(f"{table_name}_df", df)
    conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM {table_name}_df")
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ File '{csv_file}' not found.")

# ✅ Define query runner using DuckDB-native fetchdf
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Show tables and preview
tables = run_query("SHOW TABLES")
for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All done with warnings suppressed!")


## Execution

**MULTICSV**

# VSCODE

In [None]:
import duckdb
import pandas as pd
from IPython.display import display
import warnings

# ✅ Suppress UserWarnings from pandas about DuckDB connection
warnings.filterwarnings("ignore", category=UserWarning)

# ✅ Shared in-memory DuckDB connection
conn = duckdb.connect()  # Optional: use 'inventory.duckdb' to persist

# ✅ Define CSV filename-to-table mapping
csv_files = {
    "customers.csv": "customers",
    "order_items.csv": "order_items",
    "orders.csv": "orders",
    "payments.csv": "payments",
    "products.csv": "products"
}

# ✅ Read and write each CSV as a DuckDB table using df.to_sql()
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table, conn, if_exists='replace')  # ✅ Using df.to_sql()
        print(f"✅ Loaded '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file}' — file not found")

# ✅ Reusable SQL query runner using DuckDB-native method
def run_query(query):
    return conn.execute(query).fetchdf()  # ✅ No pandas SQL warnings

# ✅ Show tables and display sample records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs loaded into DuckDB using df.to_sql(), with warnings suppressed!")


# In collab

In [None]:
# ✅ Install DuckDB (only needed once in Colab)
!pip install duckdb --quiet

# ✅ Import libraries
import duckdb
import pandas as pd
from IPython.display import display
import warnings

# ✅ Suppress UserWarnings from pandas about DuckDB connection
warnings.filterwarnings("ignore", category=UserWarning)

# ✅ Shared in-memory DuckDB connection
conn = duckdb.connect()

# ✅ Upload CSVs to Colab (run this cell first and select files)
from google.colab import files
uploaded = files.upload()  # 👈 Choose your 5 CSVs manually here

# ✅ Define CSV filename-to-table mapping (must match uploaded filenames)
csv_files = {
    "customers.csv": "customers",
    "order_items.csv": "order_items",
    "orders.csv": "orders",
    "payments.csv": "payments",
    "products.csv": "products"
}

# ✅ Read and write each CSV as a DuckDB table using df.to_sql()
for file, table in csv_files.items():
    try:
        df = pd.read_csv(file)
        df.to_sql(table, conn, if_exists='replace')  # ✅ Use df.to_sql()
        print(f"✅ Loaded '{file}' into DuckDB as table '{table}'")
    except FileNotFoundError:
        print(f"⚠️ Skipped '{file}' — file not found")

# ✅ Define SQL query runner using DuckDB-native method
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Display all tables and preview sample rows
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 All CSVs loaded into DuckDB using df.to_sql(), with warnings suppressed!")


# Single CSV (VSCODE)

In [None]:
import duckdb
import pandas as pd
from IPython.display import display
import warnings

# ✅ Suppress UserWarnings from pandas about DuckDB connection
warnings.filterwarnings("ignore", category=UserWarning)

# ✅ Connect to DuckDB (in-memory or persistent if desired)
conn = duckdb.connect()  # Optional: use 'inventory.duckdb'

# ✅ Define the CSV file and DuckDB table name
csv_file = "customers.csv"
table_name = "customers"

# ✅ Read and load the CSV into DuckDB
try:
    df = pd.read_csv(csv_file)
    df.to_sql(table_name, conn, if_exists='replace')  # ✅ Using df.to_sql()
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ File '{csv_file}' not found!")

# ✅ Reusable SQL query runner using DuckDB-native method
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Show the table and display sample records
tables = run_query("SHOW TABLES")

for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 Single CSV loaded into DuckDB using df.to_sql(), with warnings suppressed!")


## In collab

In [None]:
# ✅ Install DuckDB in Colab if not already
!pip install duckdb --quiet

# ✅ Import modules
import duckdb
import pandas as pd
import warnings
from IPython.display import display

# ✅ Suppress UserWarnings from pandas with DuckDB connection
warnings.filterwarnings("ignore", category=UserWarning)

# ✅ Connect to DuckDB (in-memory or persist with a filename)
conn = duckdb.connect()  # You can use 'mydb.duckdb' to persist

# ✅ Upload the file from local (optional step for Colab)
from google.colab import files
uploaded = files.upload()  # 📁 Upload your 'customers.csv' file

# ✅ Define single CSV file and DuckDB table name
csv_file = "customers.csv"
table_name = "customers"

# ✅ Read and load the CSV using df.to_sql()
try:
    df = pd.read_csv(csv_file)
    df.to_sql(table_name, conn, if_exists='replace')  # ✅ df.to_sql() with DuckDB
    print(f"✅ Loaded '{csv_file}' into DuckDB as table '{table_name}'")
except FileNotFoundError:
    print(f"⚠️ File '{csv_file}' not found!")

# ✅ SQL query runner
def run_query(query):
    return conn.execute(query).fetchdf()

# ✅ Show tables and preview sample records
tables = run_query("SHOW TABLES")
for table in tables['name']:
    print('-'*50, f'{table}', '-'*50)
    count = run_query(f"SELECT COUNT(*) AS count FROM {table}")['count'].values[0]
    print("Count of records:", count)
    display(run_query(f"SELECT * FROM {table} LIMIT 5"))

print("🎉 Done! Single CSV loaded into DuckDB using df.to_sql(), and preview displayed.")


## You can use execute - pd sql or df-sql - pd-sql and you can use execute-execute or df-sql - execute  

its better to use execute-execute i.e register process to avoid future warnings.