# Using run_query & made a local database

In [None]:
import duckdb
import pandas as pd

# Step 1: Load CSV
df = pd.read_csv('/kaggle/input/netflix-shows/netflix_titles.csv')

# Step 2: Connect to DuckDB
conn = duckdb.connect('netflix_project.duckdb')

# ✅ Step 3: Register the DataFrame as a temporary view
conn.register('df_view', df)

# ✅ Step 4: Create or replace a permanent DuckDB table from the registered view
conn.execute("CREATE OR REPLACE TABLE netflix AS SELECT * FROM df_view")

# Step 5: Define reusable SQL query runner
def run_query(query):
    return conn.execute(query).fetchdf()

# Step 6: Example query
print("🎥 Let's compare how many Movies and TV Shows are in the Netflix dataset.")
query = """
SELECT type, COUNT(*) AS count
FROM netflix
GROUP BY type
"""
df_result = run_query(query)
print(df_result)

In [None]:
import duckdb
import pandas as pd

# Step 1: Load CSV
df = pd.read_csv('/kaggle/input/netflix-shows/netflix_titles.csv')

# Step 2: Connect to DuckDB
conn = duckdb.connect('netflix_project.duckdb')

# Step 3: Register the DataFrame as a DuckDB view
conn.register('df_view', df)

# Step 4: Create or replace a permanent DuckDB table
conn.execute("CREATE OR REPLACE TABLE netflix AS SELECT * FROM df_view")

# Step 5: Define reusable SQL query runner
def run_query(query):
    return conn.execute(query).fetchdf()

# Step 6: Example query
print("🎥 Let's compare how many Movies and TV Shows are in the Netflix dataset.")
query = """
SELECT type, COUNT(*) AS count
FROM netflix
GROUP BY type
"""
df_result = run_query(query)
print(df_result)

# Same thing using lamda instead of def

In [None]:
import duckdb
import pandas as pd

# Step 1: Load CSV
df = pd.read_csv('/kaggle/input/netflix-shows/netflix_titles.csv')

# Step 2: Connect to DuckDB
conn = duckdb.connect('netflix_project.duckdb')

# ✅ Step 3: Register the DataFrame as a temporary view
conn.register('df_view', df)

# ✅ Step 4: Create or replace a permanent DuckDB table from the registered view
conn.execute("CREATE OR REPLACE TABLE netflix AS SELECT * FROM df_view")

# ✅ Step 5: Define reusable SQL query runner using lambda
duckq = lambda q: conn.execute(q).fetchdf()

# ✅ Step 6: Example query
print("🎥 Let's compare how many Movies and TV Shows are in the Netflix dataset.")
query = """
SELECT type, COUNT(*) AS count
FROM netflix
GROUP BY type
"""
df_result = duckq(query)
print(df_result)

# Another process add directly from csv we make database

First Process

In [None]:
import duckdb
import pandas as pd

# Load the Netflix CSV file
df = pd.read_csv("/kaggle/input/netflix-shows/netflix_titles.csv")

# ✅ Step 1: Define reusable function (no conn, no register)
def run_query(query):
    return duckdb.query(query).to_df()

# ✅ Step 2: Use the DataFrame df directly inside the query
query = """
SELECT type, COUNT(*) AS count
FROM df
GROUP BY type
"""

# Run the query
result = run_query(query)
print(result)

2nd Process

In [None]:
import duckdb
import pandas as pd

# Step 1: Load your Netflix CSV file
df = pd.read_csv("/kaggle/input/netflix-shows/netflix_titles.csv")

# Step 2: Register the DataFrame in DuckDB as a temporary view
duckdb.register('df', df)

# Step 3: Define a reusable SQL runner function
def run_query(query):
    return duckdb.query(query).to_df()

# Step 4: Write and run a query
query = """
SELECT type, COUNT(*) AS count
FROM df
GROUP BY type
"""

result = run_query(query)
print(result)

# Using Lambda

In [None]:
import duckdb
import pandas as pd

# Load the Netflix CSV file
df = pd.read_csv("/kaggle/input/netflix-shows/netflix_titles.csv")

# ✅ Lambda version of run_query
run_query = lambda q: duckdb.query(q).to_df()

# Query using DataFrame df directly
query = """
SELECT type, COUNT(*) AS count
FROM df
GROUP BY type
"""

# Execute the query
result = run_query(query)
print(result)

Means duckdb use Df as my database name
So after From use the database name which you gave

## Now for multicsv files without connect using lambda

In [None]:
import duckdb
import pandas as pd

# Dictionary of CSV files and their aliases
csv_files = {
    "video_game_sales.csv": "video_game_sales",
    "game_reviews.csv": "game_reviews",
    "console_specs.csv": "console_specs"
}

# Load CSVs into DataFrames and register in DuckDB
for file, alias in csv_files.items():
    df = pd.read_csv(file)
    duckdb.register(alias, df)  # Register each DataFrame with an alias

# ✅ Lambda version of query runner
run_query = lambda q: duckdb.query(q).to_df()

# 🎯 Example: Query from video_game_sales
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# Run and show result
result = run_query(query)
print(result)

## Now for multicsv files without connect 

In [None]:
import duckdb
import pandas as pd

# Dictionary of CSV files and their aliases
csv_files = {
    "video_game_sales.csv": "video_game_sales",
    "game_reviews.csv": "game_reviews",
    "console_specs.csv": "console_specs"
}

# Load CSVs into DataFrames and register in DuckDB
for file, alias in csv_files.items():
    df = pd.read_csv(file)
    duckdb.register(alias, df)  # Register each DataFrame with an alias

# ✅ Reusable function version of query runner
def run_query(query):
    return duckdb.query(query).to_df()

# 🎯 Example: Query from video_game_sales
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# Run and show result
result = run_query(query)
print(result)


## 👇👇👇Now for multicsv for making connect to make local database

In [None]:
import duckdb
import pandas as pd

# Dictionary of CSV file names and their corresponding table names
csv_files = {
    "video_game_sales.csv": "video_game_sales",
    "game_reviews.csv": "game_reviews",
    "console_specs.csv": "console_specs"
}

# Connect to DuckDB (creates a file if it doesn't exist)
conn = duckdb.connect("gaming_data.duckdb")

# Loop through files, read CSV, and save as table in DuckDB
for file, table_name in csv_files.items():
    df = pd.read_csv(file)
    
    # Register the DataFrame as a temporary view
    conn.register('temp_df', df)
    
    # Create or replace a permanent DuckDB table
    conn.execute(f"CREATE OR REPLACE TABLE {table_name} AS SELECT * FROM temp_df")

print("✅ All CSV files successfully loaded into DuckDB!")

In [None]:
## without functions query or lamda

query = """

"""

result = conn.execute(query).fetchdf()
result


In [None]:
# ✅ Define reusable query function
def run_query(query):
    return conn.execute(query).fetchdf()

# 🎯 SQL Query: Total global sales by platform
query = """

"""

# ✅ Run the query and store the result
result = run_query(query)

# 🖨️ Display the result
print(result)


In [None]:
# ✅ Lambda version of reusable query function
run_query = lambda query: conn.execute(query).fetchdf()

# 🎯 SQL Query: Total global sales by platform
query = """

"""

# ✅ Run the query and store the result
result = run_query(query)

# 🖨️ Display the result
result


## Duckdb multicsv query without .register()👇👇

In [None]:
import duckdb
import pandas as pd

# Dictionary of CSV files and their DataFrame variable names
csv_files = {
    "video_game_sales.csv": "video_game_sales",
    "game_reviews.csv": "game_reviews",
    "console_specs.csv": "console_specs"
}

# Load CSVs into DataFrames with variable names as keys
for file, var_name in csv_files.items():
    globals()[var_name] = pd.read_csv(file)  # Dynamically create variables

# ✅ Lambda version of query runner
run_query = lambda q: duckdb.query(q).to_df()

# 🎯 Example query from video_game_sales (no register needed)
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# Execute and print result
result = run_query(query)
print(result)

In [None]:
# ✅ How It Works:

# globals()[var_name] = pd.read_csv(file)
# → Dynamically creates DataFrames like:
# 
# video_game_sales = pd.read_csv("video_game_sales.csv")
# game_reviews = pd.read_csv("game_reviews.csv")
# 
# DuckDB can automatically detect video_game_sales in memory during the query.

# ✅ Here's the cleanest way without using .register() or globals():

# You can use duckdb.query(query).to_df() with Python object binding like this: 👇👇

In [None]:
import duckdb
import pandas as pd

# Load CSVs into DataFrames
df_sales = pd.read_csv("video_game_sales.csv")
df_reviews = pd.read_csv("game_reviews.csv")
df_specs = pd.read_csv("console_specs.csv")

# ✅ Lambda query runner that accepts bindings (no register needed)
run_query = lambda q: duckdb.query(q).to_df({'video_game_sales': df_sales, 'game_reviews': df_reviews, 'console_specs': df_specs})

# Example Query
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# Run and print result
result = run_query(query)
print(result)

In [None]:
import duckdb
import pandas as pd

# Load CSVs into DataFrames
df_sales = pd.read_csv("video_game_sales.csv")
df_reviews = pd.read_csv("game_reviews.csv")
df_specs = pd.read_csv("console_specs.csv")

# ✅ Reusable function to run SQL queries with DataFrame bindings
def run_query(query):
    return duckdb.query(query).to_df({
        "video_game_sales": df_sales,
        "game_reviews": df_reviews,
        "console_specs": df_specs
    })

# 🎯 Example Query
query = """
SELECT Platform, SUM(Global_Sales) AS total_sales
FROM video_game_sales
GROUP BY Platform
ORDER BY total_sales DESC
"""

# Run and show result
result = run_query(query)
print(result)