## **1. pandas to duckdb**

In [None]:
import pandas as pd
import duckdb

# Create a sample DataFrame
data = {
    'id': [1, 2, 3, 4, 5],
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'age': [25, 30, 35, 40, 45]
}
df = pd.DataFrame(data)
df

Unnamed: 0,id,name,age
0,1,Alice,25
1,2,Bob,30
2,3,Charlie,35
3,4,David,40
4,5,Eve,45


In [None]:
# Create a connection to DuckDB in-memory database
con = duckdb.connect(database=':memory:', read_only=False)

# Save the DataFrame to DuckDB
# 'users' 테일블 만들기
con.execute("CREATE TABLE users AS SELECT * FROM df;")

# Verify by querying the data back
result = con.execute("SELECT * FROM users;").fetchdf()

# Uncomment the following lines when ready to finalize and test the code
result


Unnamed: 0,id,name,age
0,1,Alice,25
1,2,Bob,30
2,3,Charlie,35
3,4,David,40
4,5,Eve,45


In [None]:
# prompt: 두 개의 데이터프레임이 동일한지 확인하는 방법
pd.testing.assert_frame_equal(df, result)

## **2. pandas to sqlite3**

In [None]:
import pandas as pd
import sqlite3

# Create a sample DataFrame
data = {
    'id': [1, 2, 3, 4, 5],
    'name': ['Alice', 'Bob', 'Charlie', 'David', 'Eve'],
    'age': [25, 30, 35, 40, 45]
}
df = pd.DataFrame(data)
df

Unnamed: 0,id,name,age
0,1,Alice,25
1,2,Bob,30
2,3,Charlie,35
3,4,David,40
4,5,Eve,45


## **방법 1.**

In [None]:
# prompt: df DataFrame 사용: save df to sqlite3

import sqlite3

# Connect to the database
conn = sqlite3.connect("my_data.db")

# Create a cursor
cursor = conn.cursor()

# Create the table
cursor.execute("""CREATE TABLE IF NOT EXISTS users (
    id INTEGER PRIMARY KEY,
    name TEXT,
    age INTEGER
)""")

# Insert the data into the table
df.to_sql('users', conn, if_exists='append', index=False)

# Commit the changes
conn.commit()

# Verify by querying the data back
query_result = pd.read_sql('SELECT * FROM users', conn)

# Close the connection
conn.close()

In [None]:
import numpy as np

from sklearn.compose import ColumnTransformer
from sklearn.datasets import fetch_openml
from sklearn.feature_selection import SelectPercentile, chi2
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RandomizedSearchCV, train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler

np.random.seed(0)

In [None]:
numeric_features = ["age", "fare"]
numeric_transformer = Pipeline(
    steps=[("imputer", SimpleImputer(strategy="median")), ("scaler", StandardScaler())]
)

categorical_features = ["embarked", "sex", "pclass"]
categorical_transformer = Pipeline(
    steps=[
        ("encoder", OneHotEncoder(handle_unknown="ignore")),
        ("selector", SelectPercentile(chi2, percentile=50)),
    ]
)
preprocessor = ColumnTransformer(
    transformers=[
        ("num", numeric_transformer, numeric_features),
        ("cat", categorical_transformer, categorical_features),
    ]
)

In [None]:
X, y = fetch_openml("titanic", version=1, as_frame=True, return_X_y=True)

# Alternatively X and y can be obtained directly from the frame attribute:
# X = titanic.frame.drop('survived', axis=1)
# y = titanic.frame['survived']

  warn(


In [None]:
clf = Pipeline(
    steps=[("preprocessor", preprocessor), ("classifier", LogisticRegression())]
)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

clf.fit(X_train, y_train)
print("model score: %.3f" % clf.score(X_test, y_test))

model score: 0.798


In [None]:
clf

In [None]:
query_result

Unnamed: 0,id,name,age
0,1,Alice,25
1,2,Bob,30
2,3,Charlie,35
3,4,David,40
4,5,Eve,45


## **방법 2.**

In [None]:
# Connect to SQLite database (or create it if it doesn't exist)
conn = sqlite3.connect('example.db')

# Save the DataFrame to SQLite named 'users'
df.to_sql('users', conn, if_exists='replace', index=False)

# Verify by querying the data back
query_result = pd.read_sql('SELECT * FROM users', conn)

# Close the connection
conn.close()

# Uncomment the following line when ready to run locally
query_result

Unnamed: 0,id,name,age
0,1,Alice,25
1,2,Bob,30
2,3,Charlie,35
3,4,David,40
4,5,Eve,45
