In [1]:
!pip install sentence-transformers boto3 numpy

Collecting sentence-transformers
  Using cached sentence_transformers-4.1.0-py3-none-any.whl.metadata (13 kB)
Using cached sentence_transformers-4.1.0-py3-none-any.whl (345 kB)
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-4.1.0


In [2]:
# Complete Testing Notebook - All Required Functions
import boto3
import json
import sagemaker
from sagemaker.huggingface import HuggingFaceModel
from sagemaker import get_execution_role
import time
import logging
from sentence_transformers import SentenceTransformer
from scipy.spatial.distance import cosine
from datetime import datetime, timedelta

# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

print("üì¶ Loading dependencies and setting up clients...")

# Initialize AWS clients
s3 = boto3.client('s3')
dynamodb = boto3.resource('dynamodb')
sagemaker_runtime = boto3.client('sagemaker-runtime')
sagemaker_session = sagemaker.Session()
role = "arn:aws:iam::467383999568:role/LabRole"  #get_execution_role()

# Configuration - UPDATE THESE WITH YOUR VALUES
BUCKET_NAME = "tech-translator-s3-knowledge-base"  # Your S3 bucket
TABLE_NAME = "tech-translator-dynamodb-vector-storage"  # Your DynamoDB table

print(f"üìù Configuration:")
print(f"  S3 Bucket: {BUCKET_NAME}")
print(f"  DynamoDB Table: {TABLE_NAME}")

# Initialize sentence transformer model
print("ü§ñ Loading sentence transformer model...")
model = SentenceTransformer('all-MiniLM-L6-v2')
print("‚úÖ Model loaded successfully!")

# ===== DEPLOYMENT FUNCTIONS =====
def deploy_model():
    """Deploy model to SageMaker endpoint """
    try:
        print("üì¶ Creating HuggingFace model configuration...")
        
        # Model configuration using env parameter
        hub = {
            'HF_MODEL_ID': 'google/flan-t5-large', #'google/flan-t5-base', #'google/flan-t5-small', 
            'HF_TASK': 'text2text-generation'
        }
        
        # Create model with supported versions
        huggingface_model = HuggingFaceModel(
            transformers_version="4.37.0",
            pytorch_version="2.1.0",
            py_version="py310",
            env=hub,
            role=role,
        )
        
        print("üöÄ Deploying model...")
        print("‚è±Ô∏è  This may take 5-10 minutes...")
        
        # List of instance types to try
        instance_types = [
            #"ml.m5.large",
            #"ml.c5.large", 
            "ml.m5.xlarge",
            "ml.c5.xlarge",
        ]
        
        predictor = None
        deployment_successful = False
        
        for instance_type in instance_types:
            try:
                print(f"\nüîÑ Trying deployment on {instance_type}...")
                
                # FIXED: Proper endpoint name parameter
                endpoint_name = f"tech-translator-model-{int(time.time())}"
                
                predictor = huggingface_model.deploy(
                    initial_instance_count=1,
                    instance_type=instance_type,
                    endpoint_name=endpoint_name,  
                    container_startup_health_check_timeout=600,
                    model_data_download_timeout=600,
                    wait=True
                )
                
                print(f"‚úÖ Successfully deployed on {instance_type}!")
                deployment_successful = True
                break
                
            except Exception as e:
                print(f"‚ùå Failed to deploy on {instance_type}: {str(e)}")
                if any(err in str(e) for err in ["ResourceLimitExceeded", "InsufficientCapacity", "ValidationException"]):
                    print("   Trying next instance type...")
                    continue
                else:
                    print(f"   Unexpected error, continuing...")
                    continue
        
        if deployment_successful:
            print(f"‚úÖ Model deployed successfully!")
            print(f"üìç Endpoint name: {endpoint_name}")
            return predictor, endpoint_name
        else:
            print("‚ùå All deployment attempts failed!")
            return None, None
            
    except Exception as e:
        print(f"‚ùå Model creation failed: {str(e)}")
        return None, None




def cleanup_endpoint(endpoint_name):
    """Delete the endpoint to avoid charges"""
    print(f"\nüßπ Cleaning up endpoint: {endpoint_name}")
    
    try:
        sagemaker_client = boto3.client('sagemaker')
        sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
        print("‚úÖ Endpoint deletion initiated!")
        print("üí∞ This will stop incurring charges.")
        return True
    except Exception as e:
        print(f"‚ùå Error deleting endpoint: {str(e)}")
        return False



sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/sagemaker-user/.config/sagemaker/config.yaml


2025-05-24 19:19:37.663015: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cpu
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: all-MiniLM-L6-v2


üì¶ Loading dependencies and setting up clients...
üìù Configuration:
  S3 Bucket: tech-translator-s3-knowledge-base
  DynamoDB Table: tech-translator-dynamodb-vector-storage
ü§ñ Loading sentence transformer model...
‚úÖ Model loaded successfully!


In [3]:
# ===== TESTING FUNCTIONS =====
def call_endpoint(endpoint_name, prompt, max_new_tokens=100):
    """Helper function to call the deployed FLAN-T5 endpoint with proper format"""
    
    # FLAN-T5 is a text-to-text model, so we need to format the prompt properly
    # It works best with instruction-style prompts
    
    payload = {
        "inputs": prompt,
        "parameters": {
            "max_new_tokens": max_new_tokens,  # Use max_new_tokens instead of max_length
            "temperature": 0.7,
            "do_sample": True,
            "top_p": 0.9,
            "repetition_penalty": 1.1,
            # Remove return_full_text - not supported by FLAN-T5
            # Remove pad_token_id - FLAN-T5 handles this automatically
        }
    }
    
    try:
        response = sagemaker_runtime.invoke_endpoint(
            EndpointName=endpoint_name,
            ContentType='application/json',
            Body=json.dumps(payload)
        )
        
        result = json.loads(response['Body'].read().decode())
        
        # Handle FLAN-T5 response format
        if isinstance(result, list) and len(result) > 0:
            if isinstance(result[0], dict):
                generated_text = result[0].get('generated_text', '')
            else:
                generated_text = str(result[0])
        elif isinstance(result, dict):
            generated_text = result.get('generated_text', '')
        else:
            generated_text = str(result)
        
        # Clean up the response
        generated_text = generated_text.strip()
        
        return generated_text
        
    except Exception as e:
        print(f"‚ùå Endpoint call failed: {str(e)}")
        import traceback
        traceback.print_exc()
        return None


def quick_test(endpoint_name, test_query="What is R-squared?"):
    """Quick test of the FLAN-T5 endpoint with proper instruction format"""
    print(f"‚ö° Quick Test: '{test_query}'")
    
    # Format the query as an instruction for FLAN-T5
    instruction_prompt = f"Explain the following concept: {test_query}"
    
    response = call_endpoint(endpoint_name, instruction_prompt, max_new_tokens=50)
    
    if response:
        print(f"Response: {response}")
        print("‚úÖ Endpoint is working!")
        return True
    else:
        print("‚ùå Test failed")
        return False



# STEP 1: Deploy the model

In [4]:
# STEP 1: Deploy the model
print("üöÄ STEP 1: Deploying model...")
print("=" * 50)

predictor, endpoint_name = deploy_model()

if predictor and endpoint_name:
    print(f"\n‚úÖ Deployment successful!")
    print(f"üìç Endpoint: {endpoint_name}")
    
    # Store for later use
    DEPLOYED_ENDPOINT_NAME = endpoint_name
    print(f"üìù Stored in variable: DEPLOYED_ENDPOINT_NAME")
    
    # Wait for endpoint to be ready
    print("\n‚è≥ Waiting for endpoint to be fully ready...")
    time.sleep(60)  # Wait 1 minute for endpoint to stabilize
    
else:
    print("‚ùå Deployment failed!")
    print("Check error messages above and try troubleshooting")

üöÄ STEP 1: Deploying model...
üì¶ Creating HuggingFace model configuration...
üöÄ Deploying model...
‚è±Ô∏è  This may take 5-10 minutes...

üîÑ Trying deployment on ml.m5.xlarge...


INFO:sagemaker:Creating model with name: huggingface-pytorch-inference-2025-05-24-19-19-50-284
INFO:sagemaker:Creating endpoint-config with name tech-translator-model-1748114390
INFO:sagemaker:Creating endpoint with name tech-translator-model-1748114390


-------!‚úÖ Successfully deployed on ml.m5.xlarge!
‚úÖ Model deployed successfully!
üìç Endpoint name: tech-translator-model-1748114390

‚úÖ Deployment successful!
üìç Endpoint: tech-translator-model-1748114390
üìù Stored in variable: DEPLOYED_ENDPOINT_NAME

‚è≥ Waiting for endpoint to be fully ready...


# Step 2: Update Lambda with endpoint

In [5]:
def update_lambda_with_endpoint(endpoint_name, lambda_stack_name="tech-translator-lambda", region="us-east-1"):
    """
    Update Lambda function with new SageMaker endpoint name
    """
    print(f"üîÑ Updating Lambda with endpoint: {endpoint_name}")
    
    try:
        # Initialize CloudFormation client
        cf_client = boto3.client('cloudformation', region_name=region)
        
        # Check if stack exists
        try:
            cf_client.describe_stacks(StackName=lambda_stack_name)
            print(f"‚úÖ Found Lambda stack: {lambda_stack_name}")
        except cf_client.exceptions.ClientError:
            print(f"‚ùå Lambda stack '{lambda_stack_name}' not found!")
            print("Make sure you've deployed your Lambda functions first with ./deploy.sh")
            return False
        
        # Update the stack with new endpoint parameter
        print("üîÑ Updating CloudFormation stack...")
        
        response = cf_client.update_stack(
            StackName=lambda_stack_name,
            UsePreviousTemplate=True,  # Keep the same template
            Parameters=[
                {
                    'ParameterKey': 'SageMakerEndpointName',
                    'ParameterValue': endpoint_name
                },
                # Keep all other parameters the same
                {
                    'ParameterKey': 'ProjectName',
                    'UsePreviousValue': True
                },
                {
                    'ParameterKey': 'S3StackName', 
                    'UsePreviousValue': True
                },
                {
                    'ParameterKey': 'DynamoDBStackName',
                    'UsePreviousValue': True
                },
                {
                    'ParameterKey': 'CognitoStackName',
                    'UsePreviousValue': True
                },
                {
                    'ParameterKey': 'LambdaCodeBucket',
                    'UsePreviousValue': True
                }
            ],
            Capabilities=['CAPABILITY_IAM', 'CAPABILITY_NAMED_IAM']
        )
        
        print(f"‚úÖ Stack update initiated: {response['StackId']}")
        
        # Wait for update to complete
        print("‚è≥ Waiting for stack update to complete...")
        waiter = cf_client.get_waiter('stack_update_complete')
        waiter.wait(StackName=lambda_stack_name)
        
        print("‚úÖ Lambda updated successfully!")
        print(f"üìç Lambda now uses endpoint: {endpoint_name}")
        
        # Verify the update
        print("üîç Verifying Lambda environment variables...")
        verify_lambda_update(endpoint_name, lambda_stack_name, region)
        
        return True
        
    except Exception as e:
        print(f"‚ùå Lambda update failed: {str(e)}")
        return False

def verify_lambda_update(endpoint_name, lambda_stack_name, region="us-east-1"):
    """
    Verify that Lambda function has the correct endpoint name
    """
    try:
        cf_client = boto3.client('cloudformation', region_name=region)
        lambda_client = boto3.client('lambda', region_name=region)
        
        # Get Lambda function name from stack outputs
        stack_response = cf_client.describe_stacks(StackName=lambda_stack_name)
        outputs = stack_response['Stacks'][0].get('Outputs', [])
        
        main_function_name = None
        for output in outputs:
            if output['OutputKey'] == 'MainLambdaFunctionName':
                main_function_name = output['OutputValue']
                break
        
        if main_function_name:
            # Get current environment variables
            function_config = lambda_client.get_function_configuration(
                FunctionName=main_function_name
            )
            
            current_endpoint = function_config.get('Environment', {}).get('Variables', {}).get('SAGEMAKER_ENDPOINT')
            
            print(f"Current Lambda endpoint setting: {current_endpoint}")
            
            if current_endpoint == endpoint_name:
                print("‚úÖ Lambda environment variable updated correctly!")
            else:
                print("‚ö†Ô∏è  Environment variable may not be updated yet")
        else:
            print("‚ö†Ô∏è  Could not find Lambda function name in stack outputs")
            
    except Exception as e:
        print(f"‚ö†Ô∏è  Could not verify update: {str(e)}")

# Usage after your model deployment
if 'endpoint_name' in locals() and endpoint_name:
    print("\n" + "="*60)
    print("üîÑ UPDATING LAMBDA WITH NEW ENDPOINT")
    print("="*60)
    
    success = update_lambda_with_endpoint(endpoint_name)
    
    if success:
        print("\nüéâ Integration complete!")
        print("‚úÖ SageMaker endpoint deployed")
        print("‚úÖ Lambda function updated")
    else:
        print("\n‚ö†Ô∏è  Manual update may be needed")
        print(f"Update CloudFormation parameter SageMakerEndpointName to: {endpoint_name}")
        
else:
    print("‚ùå No endpoint_name variable found!")
    print("Make sure you run this cell after successfully deploying your SageMaker endpoint")
    print("The endpoint_name should be available from your deployment cell")


üîÑ UPDATING LAMBDA WITH NEW ENDPOINT
üîÑ Updating Lambda with endpoint: tech-translator-model-1748114390
‚úÖ Found Lambda stack: tech-translator-lambda
üîÑ Updating CloudFormation stack...
‚úÖ Stack update initiated: arn:aws:cloudformation:us-east-1:467383999568:stack/tech-translator-lambda/15700260-35aa-11f0-b87c-0eb6554517e1
‚è≥ Waiting for stack update to complete...
‚úÖ Lambda updated successfully!
üìç Lambda now uses endpoint: tech-translator-model-1748114390
üîç Verifying Lambda environment variables...
Current Lambda endpoint setting: tech-translator-model-1748114390
‚úÖ Lambda environment variable updated correctly!

üéâ Integration complete!
‚úÖ SageMaker endpoint deployed
‚úÖ Lambda function updated
