# Create Database

In [6]:
import sqlite3
import os

# Create the 'database' folder if it doesn't exist
if not os.path.exists('database'):
    os.makedirs('database')

# Path to the SQLite database file
db_path = os.path.join('database', 'data_science_application.db')

# Connect to the SQLite database (it will be created if it doesn't exist)
conn = sqlite3.connect(db_path)
cursor = conn.cursor()

# Enable foreign key support
cursor.execute("PRAGMA foreign_keys = ON;")

# Create the data_sources table
cursor.execute('''
CREATE TABLE IF NOT EXISTS data_sources (
    data_source_id INTEGER PRIMARY KEY,
    data_source_name TEXT NOT NULL UNIQUE,
    description TEXT
)
''')

# Create the data_table table
cursor.execute('''
CREATE TABLE IF NOT EXISTS data_table (
    data_table_id INTEGER PRIMARY KEY,
    table_name TEXT NOT NULL,
    column_name TEXT NOT NULL,
    data_source_id INTEGER,
    FOREIGN KEY (data_source_id) REFERENCES data_sources (data_source_id)
)
''')

# Create the business_metadata table
cursor.execute('''
CREATE TABLE IF NOT EXISTS business_metadata (
    business_glossary_term_id INTEGER PRIMARY KEY,
    business_glossary_term TEXT NOT NULL,
    table_name TEXT NOT NULL,
    column_name TEXT NOT NULL,
    data_table_id INTEGER,
    FOREIGN KEY (data_table_id) REFERENCES data_table (data_table_id)
)
''')

# Create indexes for faster lookups (optional but recommended)
cursor.execute('CREATE INDEX IF NOT EXISTS idx_data_table_table_name ON data_table (table_name)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_data_table_column_name ON data_table (column_name)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_business_metadata_table_name ON business_metadata (table_name)')
cursor.execute('CREATE INDEX IF NOT EXISTS idx_business_metadata_column_name ON business_metadata (column_name)')

# Commit changes and close the connection
conn.commit()
conn.close()

print("Database and tables created successfully.")


Database and tables created successfully.


# Confirm Database Architecture

In [1]:
# Import the DataArchitecture class
from libraries.DataArchitecture import DataArchitecture
import os

# Path to the SQLite database file
db_path = os.path.join('database', 'data_science_application.db')

# Create an instance of DataArchitecture
data_arch = DataArchitecture(db_path)

# Retrieve and display the schema
schema_df = data_arch.show_schema()
schema_df.head(50)


Unnamed: 0,table_name,column_name,data_type,primary_key,foreign_key
0,data_sources,data_source_id,INTEGER,yes,no
1,data_sources,data_source_name,TEXT,no,no
2,data_sources,description,TEXT,no,no
3,data_table,data_table_id,INTEGER,yes,no
4,data_table,table_name,TEXT,no,no
5,data_table,column_name,TEXT,no,no
6,data_table,data_source_id,INTEGER,no,data_sources.data_source_id
7,business_metadata,business_glossary_term_id,INTEGER,yes,no
8,business_metadata,business_glossary_term,TEXT,no,no
9,business_metadata,table_name,TEXT,no,no
