In [None]:
import sys
from awsglue.transforms import *
from awsglue.utils import getResolvedOptions
from pyspark.context import SparkContext
from awsglue.context import GlueContext
from awsglue.job import Job
from awsglue.dynamicframe import DynamicFrame
import boto3


In [None]:
def create_glue_crawler(database_name, table_prefix, s3_target_path):
    glue_client = boto3.client('glue')
    
    try:
        # Create Glue Database if it doesn't exist
        glue_client.create_database(
            DatabaseInput={'Name': database_name}
        )
        
        # Create crawler configuration
        crawler_name = f"{database_name}_crawler"
        
        response = glue_client.create_crawler(
            Name=crawler_name,
            Role='AWSGlueServiceRole',  # Make sure this role exists with proper permissions
            DatabaseName=database_name,
            TablePrefix=table_prefix,
            Targets={
                'S3Targets': [
                    {'Path': s3_target_path}
                ]
            },
            SchemaChangePolicy={
                'UpdateBehavior': 'UPDATE_IN_DATABASE',
                'DeleteBehavior': 'LOG'
            }
        )
        
        # Start the crawler
        glue_client.start_crawler(Name=crawler_name)
        
        return True
        
    except glue_client.exceptions.AlreadyExistsException:
        print(f"Crawler {crawler_name} already exists")
        glue_client.start_crawler(Name=crawler_name)
        return True
    except Exception as e:
        print(f"Error creating/starting crawler: {str(e)}")
        return False



In [None]:
def process_data():
    args = getResolvedOptions(sys.argv, ['JOB_NAME'])
    sc = SparkContext()
    glueContext = GlueContext(sc)
    spark = glueContext.spark_session
    job = Job(glueContext)
    job.init(args['JOB_NAME'], args)
    
    # Parameters for your data
    database_name = "your_database_name"
    table_prefix = "your_table_prefix"
    s3_target_path = "s3://your-bucket/your-directory/"
    
    # Create and start the crawler
    crawler_success = create_glue_crawler(
        database_name=database_name,
        table_prefix=table_prefix,
        s3_target_path=s3_target_path
    )
    
    if crawler_success:
        print("Data is now available in Athena")
    else:
        print("Failed to make data available in Athena")
    
    job.commit()

if __name__ == "__main__":
    process_data()