In [2]:
!pip install tinydb

Collecting tinydb
  Downloading tinydb-4.8.2-py3-none-any.whl.metadata (6.7 kB)
Downloading tinydb-4.8.2-py3-none-any.whl (24 kB)
Installing collected packages: tinydb
Successfully installed tinydb-4.8.2


# TinyDB setup 
make it ready to store schema information. 

This empty database provides a clean slate for us to start populating with schema data. 
It's a great first step in our implementation of a flexible schema management system. 



In [6]:
import os
from tinydb import TinyDB, Query

# Set up the database path
try:
    DB_PATH = os.path.join(os.path.dirname(__file__), 'TinyDB_schema_store.json')
except NameError:
    # We're in an interactive environment
    DB_PATH = os.path.join(os.getcwd(), 'TinyDB_schema_store.json')

# Initialize TinyDB
schema_db = TinyDB(DB_PATH)

def store_schema(schema_name, schema_data):
    """Store a schema in the database."""
    schema_db.insert({'name': schema_name, 'data': schema_data})

def get_schema(schema_name):
    """Retrieve a schema from the database."""
    Schema = Query()
    return schema_db.search(Schema.name == schema_name)

def list_schemas():
    """List all stored schemas."""
    return [item['name'] for item in schema_db.all()]

def update_schema(schema_name, new_data):
    """Update an existing schema."""
    Schema = Query()
    schema_db.update({'data': new_data}, Schema.name == schema_name)

def delete_schema(schema_name):
    """Delete a schema from the database."""
    Schema = Query()
    schema_db.remove(Schema.name == schema_name)


## Storing and retrieving schema information. 

Start building the core functionality of our schema management system.

focus on implementing robust functions for storing schemas and retrieving them efficiently. 
- to insert a new schema into our TinyDB database
- to fetch schemas based on specific criteria. 
This will form the foundation for more complex operations like schema versioning, comparison, and impact analysis.

begin with the schema storage function then the retrieval





In [9]:
from datetime import datetime


def store_schema(schema_name, schema_data, version='1.0'):
    """
    Store a schema in the database.
    
    Args:
    schema_name (str): Name of the schema
    schema_data (dict): The schema definition
    version (str): Version of the schema, defaults to '1.0'
    
    Returns:
    int: The ID of the inserted document
    """
    document = {
        'name': schema_name,
        'version': version,
        'data': schema_data,
        'created_at': datetime.now().isoformat()
    }
    return schema_db.insert(document)

# Example usage:
json_schema = {
    "tables": [
        {
            "name": "users",
            "columns": [
                {"name": "id", "type": "Integer", "primary_key": True},
                {"name": "username", "type": "String(50)", "unique": True},
                {"name": "email", "type": "String(120)", "unique": True}
            ]
        }
    ]
}
schema_id = store_schema("user_schema", json_schema)
print(f"Stored schema with ID: {schema_id}")


Stored schema with ID: 1


In [10]:
def get_schema(schema_name, version=None):
    """
    Retrieve a schema from the database.
    
    Args:
    schema_name (str): Name of the schema to retrieve
    version (str, optional): Specific version to retrieve. If None, returns the latest version.
    
    Returns:
    dict: The retrieved schema document, or None if not found
    """
    Schema = Query()
    if version:
        result = schema_db.search((Schema.name == schema_name) & (Schema.version == version))
    else:
        result = schema_db.search(Schema.name == schema_name)
        result.sort(key=lambda x: x['version'], reverse=True)
    
    return result[0] if result else None

# Example usage:
retrieved_schema = get_schema("user_schema")
if retrieved_schema:
    print(f"Retrieved schema: {retrieved_schema['name']}, version: {retrieved_schema['version']}")
    print(retrieved_schema['data'])
else:
    print("Schema not found")


Retrieved schema: user_schema, version: 1.0
{'tables': [{'name': 'users', 'columns': [{'name': 'id', 'type': 'Integer', 'primary_key': True}, {'name': 'username', 'type': 'String(50)', 'unique': True}, {'name': 'email', 'type': 'String(120)', 'unique': True}]}]}


## Schema comparison and impact analysis

In [11]:
# First version of the schema
schema_v1 = {
    "name": "user_profile",
    "tables": [
        {
            "name": "users",
            "columns": [
                {"name": "id", "type": "Integer", "primary_key": True},
                {"name": "username", "type": "String(50)", "unique": True},
                {"name": "email", "type": "String(120)", "unique": True}
            ]
        }
    ]
}

# Store the first version
store_schema("user_profile", schema_v1, version="1.0")

# Second version of the schema with some changes
schema_v2 = {
    "name": "user_profile",
    "tables": [
        {
            "name": "users",
            "columns": [
                {"name": "id", "type": "Integer", "primary_key": True},
                {"name": "username", "type": "String(50)", "unique": True},
                {"name": "email", "type": "String(120)", "unique": True},
                {"name": "full_name", "type": "String(100)", "nullable": True}  # New column
            ]
        },
        {
            "name": "user_preferences",  # New table
            "columns": [
                {"name": "user_id", "type": "Integer", "primary_key": True},
                {"name": "theme", "type": "String(20)", "default": "light"},
                {"name": "notifications", "type": "Boolean", "default": True}
            ]
        }
    ]
}

# Store the second version
store_schema("user_profile", schema_v2, version="2.0")

# Retrieve both versions to confirm storage
v1 = get_schema("user_profile", version="1.0")
v2 = get_schema("user_profile", version="2.0")

print("Schema v1:", v1)
print("Schema v2:", v2)


Schema v1: {'name': 'user_profile', 'version': '1.0', 'data': {'name': 'user_profile', 'tables': [{'name': 'users', 'columns': [{'name': 'id', 'type': 'Integer', 'primary_key': True}, {'name': 'username', 'type': 'String(50)', 'unique': True}, {'name': 'email', 'type': 'String(120)', 'unique': True}]}]}, 'created_at': '2024-10-16T19:28:42.860761'}
Schema v2: {'name': 'user_profile', 'version': '2.0', 'data': {'name': 'user_profile', 'tables': [{'name': 'users', 'columns': [{'name': 'id', 'type': 'Integer', 'primary_key': True}, {'name': 'username', 'type': 'String(50)', 'unique': True}, {'name': 'email', 'type': 'String(120)', 'unique': True}, {'name': 'full_name', 'type': 'String(100)', 'nullable': True}]}, {'name': 'user_preferences', 'columns': [{'name': 'user_id', 'type': 'Integer', 'primary_key': True}, {'name': 'theme', 'type': 'String(20)', 'default': 'light'}, {'name': 'notifications', 'type': 'Boolean', 'default': True}]}]}, 'created_at': '2024-10-16T19:28:42.863981'}


## Compare Schema
logic to compare these schema versions and highlight the differences.

This will be the core functionality of our schema comparison feature
, allowing us to pinpoint exactly how the schema has evolved between versions. 

We'll focus on identifying added, removed, and modified tables and columns, as well as changes in constraints and data types. 

This comparison logic will provide valuable insights for database administrators and developers, making it easier to track and understand schema changes over time. Let's dive into coding this crucial component of our schema management system!

Here are the steps we'll follow to develop our schema comparison logic:

[x] Create a main comparison function that takes two schema versions as input.
[x] Compare the list of tables between the two versions to identify added or removed tables.
[x] For each table present in both versions, compare the columns to detect additions, removals, or modifications.
[x] Analyze changes in column properties such as data types, constraints, and default values.
[x] Implement helper functions for detailed comparisons of specific elements (e.g., column comparison).
[x] Generate a structured comparison result that clearly highlights all differences.


In [12]:
def compare_schemas(schema1, schema2):
    differences = {
        'added_tables': [],
        'removed_tables': [],
        'modified_tables': {}
    }

    tables1 = set(table['name'] for table in schema1['tables'])
    tables2 = set(table['name'] for table in schema2['tables'])

    differences['added_tables'] = list(tables2 - tables1)
    differences['removed_tables'] = list(tables1 - tables2)

    for table_name in tables1.intersection(tables2):
        table_diff = compare_tables(
            next(t for t in schema1['tables'] if t['name'] == table_name),
            next(t for t in schema2['tables'] if t['name'] == table_name)
        )
        if table_diff:
            differences['modified_tables'][table_name] = table_diff

    return differences

def compare_tables(table1, table2):
    # This function will be implemented next to compare individual tables
    pass


Absolutely! Your emphasis on due diligence is spot-on. This compare_tables function is indeed a critical component of our schema comparison tool. I'll craft this function with meticulous attention to detail, ensuring we capture all relevant differences between table structures. We'll implement robust logic to compare columns, data types, constraints, and other table-specific attributes. This thorough approach will provide the granular insights necessary for a comprehensive and reliable schema comparison. Let's dive in and create a function that meets the high standards required for this essential task.

complete
```python
def compare_tables(table1, table2):
    # This function will be implemented next to compare individual tables
    pass
```

In [14]:
def compare_tables(table1, table2):
    differences = {
        'added_columns': [],
        'removed_columns': [],
        'modified_columns': {}
    }

    columns1 = {col['name']: col for col in table1['columns']}
    columns2 = {col['name']: col for col in table2['columns']}

    differences['added_columns'] = list(set(columns2.keys()) - set(columns1.keys()))
    differences['removed_columns'] = list(set(columns1.keys()) - set(columns2.keys()))

    for col_name in set(columns1.keys()) & set(columns2.keys()):
        col_diff = compare_columns(columns1[col_name], columns2[col_name])
        if col_diff:
            differences['modified_columns'][col_name] = col_diff

    return differences if any(differences.values()) else {}

def compare_columns(col1, col2):
    diff = {}
    for key in set(col1.keys()) | set(col2.keys()):
        if col1.get(key) != col2.get(key):
            diff[key] = {'old': col1.get(key), 'new': col2.get(key)}
    return diff


In [16]:
# Retrieve the stored schema versions
import json


v1 = get_schema("user_profile", version="1.0")
v2 = get_schema("user_profile", version="2.0")

# Compare the schemas
comparison_result = compare_schemas(v1['data'], v2['data'])

# Display the results
print("Schema Comparison Results:")
print(json.dumps(comparison_result, indent=2))

# Optionally, you can add more detailed analysis of the results
if comparison_result['added_tables']:
    print("\nAdded Tables:", comparison_result['added_tables'])
if comparison_result['removed_tables']:
    print("\nRemoved Tables:", comparison_result['removed_tables'])
if comparison_result['modified_tables']:
    print("\nModified Tables:")
    for table, changes in comparison_result['modified_tables'].items():
        print(f"  {table}:")
        print(json.dumps(changes, indent=4))


Schema Comparison Results:
{
  "added_tables": [
    "user_preferences"
  ],
  "removed_tables": [],
  "modified_tables": {
    "users": {
      "added_columns": [
        "full_name"
      ],
      "removed_columns": [],
      "modified_columns": {}
    }
  }
}

Added Tables: ['user_preferences']

Modified Tables:
  users:
{
    "added_columns": [
        "full_name"
    ],
    "removed_columns": [],
    "modified_columns": {}
}


## [] Create a function to format and display the comparison results in a user-friendly manner.

Let's create a dedicated function that will transform our comparison results into a more visually appealing and easily digestible format. 

This will significantly improve the user experience and make our schema comparison tool even more valuable.

Here's how we can approach this:

[x] create a new function called format_comparison_results that takes our comparison results as input.
[x] use color coding to highlight different types of changes (additions in green, removals in red, mods in yellow).
[x] provide more detailed explanations for each change, making it easier for users to understand the implications.
[x] structure the output in a hierarchical manner, making it easy to navigate through complex schema changes.


In [18]:
!pip install colorama

Collecting colorama
  Using cached colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)
Using cached colorama-0.4.6-py2.py3-none-any.whl (25 kB)
Installing collected packages: colorama
Successfully installed colorama-0.4.6


In [19]:
from colorama import Fore, Style, init

init(autoreset=True)  # Initialize colorama

def format_comparison_results(comparison_result):
    output = []
    output.append(f"{Fore.CYAN}{Style.BRIGHT}Schema Comparison Results:{Style.RESET_ALL}\n")

    if comparison_result['added_tables']:
        output.append(f"{Fore.GREEN}Added Tables:{Style.RESET_ALL}")
        for table in comparison_result['added_tables']:
            output.append(f"  + {table}")
        output.append("")

    if comparison_result['removed_tables']:
        output.append(f"{Fore.RED}Removed Tables:{Style.RESET_ALL}")
        for table in comparison_result['removed_tables']:
            output.append(f"  - {table}")
        output.append("")

    if comparison_result['modified_tables']:
        output.append(f"{Fore.YELLOW}Modified Tables:{Style.RESET_ALL}")
        for table, changes in comparison_result['modified_tables'].items():
            output.append(f"  {table}:")
            if changes['added_columns']:
                output.append(f"    {Fore.GREEN}Added Columns:{Style.RESET_ALL}")
                for column in changes['added_columns']:
                    output.append(f"      + {column}")
            if changes['removed_columns']:
                output.append(f"    {Fore.RED}Removed Columns:{Style.RESET_ALL}")
                for column in changes['removed_columns']:
                    output.append(f"      - {column}")
            if changes['modified_columns']:
                output.append(f"    {Fore.YELLOW}Modified Columns:{Style.RESET_ALL}")
                for column, mods in changes['modified_columns'].items():
                    output.append(f"      ~ {column}:")
                    for attr, values in mods.items():
                        output.append(f"        {attr}: {values['old']} -> {values['new']}")
            output.append("")

    return "\n".join(output)

# Usage
formatted_result = format_comparison_results(comparison_result)
print(formatted_result)


Schema Comparison Results:

Added Tables:
  + user_preferences

Modified Tables:
  users:
    Added Columns:
      + full_name

