In [None]:
## Import the necessary libraries
import sqlite3
import pandas as pd
import os

In [None]:
# Paths
BASE_DIR = os.path.abspath("...")                      # github_repo/
DATA_DIR = os.path.join(BASE_DIR, "data", "raw")       # github_repo/data/raw
DB_PATH = os.path.join(BASE_DIR, "insurance.db")       # github_repo/insurance.db
SCHEMA_PATH = os.path.join(BASE_DIR, "data_schema.md") # github_repo/data_schema.md

In [25]:
# Create/Connect to my database
conn = sqlite3.connect(DB_PATH)

In [None]:
# My CSV files and table names
files = {
    "address.csv": "address",
    "autoinsurance_churn.csv": "autoinsurance_churn",
    "customer.csv": "customer",
    "demographic.csv": "demographic",
    "termination.csv": "termination"
}

# Load each CSV into its own SQL table
for file, table in files.items():
    file_path = os.path.join(DATA_DIR, file)
    print(f"Loading {file_path} into table '{table}'...")
    df = pd.read_csv(file_path)
    df.to_sql(table, conn, if_exists="replace", index=False)

Loading c:\Users\jackd\Documents\GitHub\SampleRepository\data\raw\address.csv into table 'address'...
Loading c:\Users\jackd\Documents\GitHub\SampleRepository\data\raw\autoinsurance_churn.csv into table 'autoinsurance_churn'...
Loading c:\Users\jackd\Documents\GitHub\SampleRepository\data\raw\customer.csv into table 'customer'...
Loading c:\Users\jackd\Documents\GitHub\SampleRepository\data\raw\demographic.csv into table 'demographic'...
Loading c:\Users\jackd\Documents\GitHub\SampleRepository\data\raw\termination.csv into table 'termination'...


In [27]:
# Check what tables we now have
tables = pd.read_sql("SELECT name FROM sqlite_master WHERE type='table';", conn)
print("\n✅ Tables in database:")
print(tables)


✅ Tables in database:
                  name
0              address
1  autoinsurance_churn
2             customer
3          demographic
4          termination


In [28]:
# Build schema markdown
schema_lines = ["# Database Schema\n"]
for table in tables["name"]:
    cols = pd.read_sql(f"PRAGMA table_info({table});", conn)
    schema_lines.append(f"## {table}\n")
    schema_lines.append("| Column | Type |\n|--------|------|\n")
    for _, row in cols.iterrows():
        schema_lines.append(f"| {row['name']} | {row['type']} |\n")
    schema_lines.append("\n")

In [29]:
# Save schema to markdown file
with open(SCHEMA_PATH, "w") as f:
    f.writelines(schema_lines)

print(f"\n📄 Schema written to {SCHEMA_PATH}")

conn.close()


📄 Schema written to c:\Users\jackd\Documents\GitHub\SampleRepository\data_schema.md
