In [4]:
import duckdb
import pandas as pd

# Path to your CSV file
csv_file_path = 'data/9_7_24_A.csv'  # Replace with your actual CSV file path

# Read the CSV file into a DataFrame
df = pd.read_csv(csv_file_path)

# Print the first few rows and column names to debug
print(df.head())
print(df.columns)

# Ensure column names are correct and strip any extra spaces
df.columns = df.columns.str.strip()

# Check if 'Numbers' is the correct column name
if 'Numbers' not in df.columns:
    raise KeyError(f"'Numbers' column not found. Available columns are: {df.columns}")

# Split the 'Numbers' column into 5 separate columns
df[['num1', 'num2', 'num3', 'num4', 'num5']] = df['Numbers'].str.split(' ', expand=True).astype(int)

# Drop the original 'Numbers' column as it's no longer needed
df.drop(columns=['Numbers'], inplace=True)

# Connect to DuckDB and create a permanent table
con = duckdb.connect('ducdb.duckdb')

# Create the table if it doesn't exist
con.execute('''
CREATE TABLE IF NOT EXISTS numbers_data (
    Date TEXT,
    num1 INTEGER,
    num2 INTEGER,
    num3 INTEGER,
    num4 INTEGER,
    num5 INTEGER
)
''')

# Insert the DataFrame data into the DuckDB table
con.execute("INSERT INTO numbers_data SELECT * FROM df")

# Close the DuckDB connection
con.close()

print("DuckDB permanent table created successfully!")

         Date         Numbers
0  09/07/2024  08 11 16 22 26
1  09/06/2024  06 20 25 35 40
2  09/05/2024  02 08 09 13 42
3  09/04/2024  03 22 29 31 45
4  09/03/2024  12 30 32 33 42
Index(['Date', 'Numbers'], dtype='object')
DuckDB permanent table created successfully!
