In [0]:
!pip install pydantic[email]

In [0]:
from pyspark.sql import functions as F
from pyspark.sql.types import StringType

def compare_schemas(df_schema, delta_schema):

    df_fields = {field.name: field.dataType.simpleString() for field in df_schema.fields}
    delta_fields = {field.name: field.dataType.simpleString() for field in delta_schema.fields}

    all_columns = set(df_fields.keys()).union(delta_fields.keys())
    result = []

    for col in all_columns:
        src_type = df_fields.get(col, "Missing")
        dest_type = delta_fields.get(col, "Missing")
        
        if src_type == dest_type:
            status = "Match"
        elif src_type == "Missing":
            status = "Missing in Source"
        elif dest_type == "Missing":
            status = "Missing in Destination"
        else:
            status = "Mismatch"
 
        result.append((col, src_type, dest_type, status))
 
    return result

# Register the UDF
compare_schemas_udf = F.udf(lambda src, dest: compare_schemas(src, dest), StringType())

In [0]:
# Example Delta table schema and incoming DataFrame schema
from pyspark.sql.types import StructType, StructField, StringType, IntegerType

delta_table_schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("age", IntegerType(), True)
])

incoming_df_schema = StructType([
    StructField("id", IntegerType(), True),
    StructField("name", StringType(), True),
    StructField("salary", IntegerType(), True)
])

# Compare schemas
comparison_result = compare_schemas(incoming_df_schema, delta_table_schema)

# Create a DataFrame from the result
comparison_df = spark.createDataFrame(comparison_result, ["Column Name", "Source Data Type", "Destination Data Type", "Status"])

# Show the comparison result
comparison_df.show()

In [0]:
from pydantic import BaseModel, EmailStr

class User(BaseModel):
    name: str
    age: int
    email: EmailStr

user1 = User(name="Alice", age="29", email="alice@example.com")
print("User 1: ", user1)

user2 = User(name="Bob", age="not_a_number", email="bob[at]mail")
print(user2)

In [0]:
from pydantic import BaseModel, Field
from typing import Literal
class Product(BaseModel):
    product_id: str = Field(
        description = "This is the id for each product. Must be unique"
    )
    product_name: str
    price: float
    currency: Literal["USD", "EUR", "GBP"]
    in_stock: bool
    class Config:
        anystr_lower = True  # Converts currency to lowercase

product = Product(product_id="ABC123", product_name="Apple", price=12.00, currency="USD", in_stock=True)
print("Product: ", product)

In [0]:
from pydantic import BaseModel
from typing import Union, Optional

class MySecondModel(BaseModel):
    first_name: str
    middle_name: Union[str, None] # This means the parameter doesn't have to be sent
    title: Optional[str] # this means the parameter should be sent, but can be None
    last_name: str

model1 = MySecondModel(first_name="Wong", title="Ms", last_name="Juliane")
print(model1)
model2 = MySecondModel(first_name="Wong", middle_name="", title="Ms", last_name="Juliane")
print(model2)
model3 = MySecondModel(first_name="Wong", middle_name="", title=None, last_name="Juliane")
print(model3)
model4 = MySecondModel(first_name="Wong", middle_name="", last_name="Juliane")
print(model4)

In [0]:
from pydantic import BaseModel

class DefaultsModel(BaseModel):
    first_name: str = "Jane"
    middle_names: list = []  # Problem: This list is shared across all instances for pydantic < 1.10 version
    # middle_names: list = Field(default_factory=list)  # Creates a new list for each instance
    last_name: str = "Doe"

# Create multiple instances of the model
model1 = DefaultsModel()
model2 = DefaultsModel()

# Modify `middle_names` in `model1`
model1.middle_names.append("Marie")

# Check the contents of `middle_names` in both instances
print(model1.middle_names)  # Output: ['Marie']
print(model2.middle_names)  # Output: ['Marie']  <-- The same list is shared!!

In [0]:
!pip install --upgrade pydantic

In [0]:
from pydantic import BaseModel, model_validator, ValidationError
from typing import Union, Any

class AllOptionalAfterModel(BaseModel):
    param1: Union[str, None] = None
    param2: Union[str, None] = None
    param3: Union[str, None] = None
    
    @model_validator(mode="after")
    def there_must_be_one(self):
        if not (self.param1 or self.param2 or self.param3):
            raise ValidationError("One parameter must be specified")
        return self

class AllOptionalBeforeModel(BaseModel):
    param1: Union[str, None] = None
    param2: Union[str, None] = None
    param3: Union[str, None] = None
    
    @model_validator(mode="before")
    @classmethod
    def there_must_be_one(cls, data: Any):
        if not (data["param1"] or data["param2"] or data["param3"]):
            raise ValidationError("One parameter must be specified")
        return data
    
model1 = AllOptionalAfterModel(param1="value1")
print(model1)
model2 = AllOptionalAfterModel() 
print(model2)

In [0]:
from pydantic import AliasGenerator, BaseModel, ConfigDict

class Tree(BaseModel):
    model_config = ConfigDict(
        alias_generator=AliasGenerator(
            validation_alias=lambda field_name: field_name.upper(),
            serialization_alias=lambda field_name: field_name.title(),
        )
    )

    age: int
    height: float
    kind: str


t = Tree.model_validate({'AGE': 12, 'HEIGHT': 1.2, 'KIND': 'oak'})
print(t.model_dump(by_alias=True))

t2 = Tree.model_validate({'age': 12, 'HEIGHT': 1.2, 'KIND': 'oak'})
print(t2.model_dump(by_alias=True))
#> {'Age': 12, 'Height': 1.2, 'Kind': 'oak'}

In [0]:
from pydantic import BaseModel, ConfigDict, AliasGenerator, AliasPath

aliases = {
    "first_name": AliasPath("name", "first_name"),
    "last_name": AliasPath("name",  "last_name")
}


class FirstNameChoices(BaseModel):
    model_config = ConfigDict(
        alias_generator=AliasGenerator(
            validation_alias=lambda field_name: aliases.get(field_name, None)
        )
    )
    title: str
    first_name: str
    last_name: str

obj = FirstNameChoices(**{"name":{"first_name": "marc", "last_name": "Nealer"},"title":"Master Of All"})
print(obj)