### Task 1: Validate Data with a Custom Expectation in Great Expectations
**Description**: Create a custom expectation and validate data with Great Expectations.

**Load a sample DataFrame**

data = {
'age': [25, 30, 35, 40, 45],
'income': [50000, 60000, 75000, None, 100000]
}

AttributeError: module 'great_expectations' has no attribute 'from_pandas'

### Task 2: Implement a Basic Alert System for Data Quality Drops
**Description**: Set up a basic alert system that triggers when data quality drops.

In [None]:
# Write your code from here

### Task 3: Real-time Data Quality Monitoring with Python and Great Expectations
**Description**: Implement a system that monitors data quality in real-time.

In [1]:
# Write your code from here# Write your code from here
import pandas as pd
import smtplib
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
import great_expectations as ge
import time

# Task 1: Custom Expectation using Great Expectations
# Sample data
data = {'age': [25, 30, 35, 40, 45], 'income': [50000, 60000, 75000, None, 100000]}
df = pd.DataFrame(data)

# Create a Great Expectations dataframe
ge_df = ge.from_pandas(df)

# Add a built-in expectation (checking the age range)
ge_df.expect_column_values_to_be_between("age", 18, 120)

# Custom expectation: Expect that income values are greater than 0
def expect_income_greater_than_zero(df, column):
    """
    Custom Expectation: Check if income values are greater than 0
    """
    result = df[column] > 0
    if result.all():
        return {"success": True, "result": {"observed_value": df[column].values}}
    else:
        return {"success": False, "result": {"observed_value": df[column].values}}

# Add custom expectation to the dataframe
ge_df.add_expectation(
    expectation_type="expect_column_values_to_be_in_set",
    kwargs={"column": "income", "value_set": [value for value in df['income'] if value > 0]}
)

# Validate the custom expectation
validation_result = ge_df.validate()
print(validation_result)


# Task 2: Email Alert System with Enhanced Error Handling
# Function to send email alerts
def send_email_alert(subject, body, recipient_email):
    try:
        # Email configuration
        sender_email = "your_email@example.com"
        password = "your_password"
        smtp_server = "smtp.example.com"
        smtp_port = 587

        # Create email message
        msg = MIMEMultipart()
        msg['From'] = sender_email
        msg['To'] = recipient_email
        msg['Subject'] = subject
        msg.attach(MIMEText(body, 'plain'))

        # Connect to the email server and send the email
        with smtplib.SMTP(smtp_server, smtp_port) as server:
            server.starttls()  # Secure the connection
            server.login(sender_email, password)
            text = msg.as_string()
            server.sendmail(sender_email, recipient_email, text)
            print(f"Email sent successfully to {recipient_email}")
            
    except smtplib.SMTPException as e:
        print(f"SMTP error occurred: {e}")
    except Exception as e:
        print(f"An error occurred: {e}")

# Sample test for sending an alert
send_email_alert("Data Quality Alert", "Your data quality is below the expected threshold.", "recipient@example.com")


# Task 3: Real-time Data Quality Monitoring
# Simulate fetching new data
def fetch_new_data():
    # Placeholder function to simulate fetching new data
    new_data = {'age': [50], 'income': [120000]}
    return pd.DataFrame(new_data)

# Data quality check (for demonstration purposes)
def check_data_quality(data):
    # Check if 'income' contains any null values (simplified check for this example)
    if data['income'].isnull().any():
        return False
    return True

# Function to simulate real-time monitoring of data
def monitor_data_quality_stream():
    while True:
        # Simulate data ingestion (Replace this with actual data fetching)
        data = fetch_new_data()

        # Perform data quality checks
        if not check_data_quality(data):
            send_email_alert(
                "Data Quality Alert",
                "Data quality is below acceptable thresholds.",
                "recipient@example.com"
            )

        # Wait for new data or events
        time.sleep(60)  # Wait for 1 minute, adjust as needed

# Start the real-time data quality monitoring
monitor_data_quality_stream()

AttributeError: module 'great_expectations' has no attribute 'from_pandas'