In [None]:
import os
print(f"IMAGE_VERSION: {os.environ.get('IMAGE_VERSION', 'Not set')}")
print(f"SAGEMAKER_INTERNAL_IMAGE_URI: {os.environ.get('SAGEMAKER_INTERNAL_IMAGE_URI', 'Not set')}")

In [None]:
from datetime import datetime
from zoneinfo import ZoneInfo
print(f"Execution Start Time (EST): {datetime.now(ZoneInfo('America/New_York')).strftime('%Y-%m-%d %H:%M:%S %Z')}")


In [None]:
# Package upgrade skipped - using environment default
print('Using default sagemaker_studio package from environment')


# Test 11: Exploring Movie Ticket Sales with DynamoDB

This notebook performs some data analytics on movie ticket sales to predict future sales

In [0]:
from sagemaker_studio import sqlutils

In [0]:
import boto3from datetime import datetime# Print execution infoprint(f"Execution started at: {datetime.now()}")# Get current IAM rolests = boto3.client('sts')identity = sts.get_caller_identity()print(f"Current Role ARN: {identity['Arn']}")print(f"Account ID: {identity['Account']}")

Movie Ticket Sales Dataset Created!
Shape: (1000, 26)

Columns: ['movie_id', 'genre', 'rating', 'studio', 'theater_chain', 'release_date', 'budget_millions', 'star_power_score', 'director_reputation', 'sequel_flag', 'franchise_flag', 'marketing_spend_millions', 'num_theaters', 'num_screens', 'is_summer_release', 'is_holiday_release', 'is_weekend_release', 'num_competing_movies', 'avg_competitor_rating', 'social_media_mentions_k', 'critic_score', 'audience_score', 'avg_ticket_price', 'premium_format_pct', 'urban_theater_pct', 'ticket_sales_millions']

Target variable statistics:
count    1000.000000
mean       89.138230
std        25.186165
min        26.050000
25%        72.065000
50%        87.465000
75%       103.602500
max       304.280000
Name: ticket_sales_millions, dtype: float64

Sample records:


Unnamed: 0,movie_id,genre,rating,studio,theater_chain,release_date,budget_millions,star_power_score,director_reputation,sequel_flag,franchise_flag,marketing_spend_millions,num_theaters,num_screens,is_summer_release,is_holiday_release,is_weekend_release,num_competing_movies,avg_competitor_rating,social_media_mentions_k,critic_score,audience_score,avg_ticket_price,premium_format_pct,urban_theater_pct,ticket_sales_millions
0,MOVIE_0001,Comedy,G,Paramount,Landmark,2025-03-18,18.13,8,5,0,0,6.85,966,1381,0,0,0,2,7.3,17.5,15.1,74.1,15.54,0.15,0.797,60.86
1,MOVIE_0002,Sci-Fi,G,Amazon,Showcase,2025-09-30,41.72,9,1,0,0,23.46,3058,4650,0,0,0,2,7.7,21.5,63.3,23.5,13.06,0.201,0.426,88.19
2,MOVIE_0003,Horror,PG,Netflix,AMC,2024-04-06,7.54,5,2,0,1,4.84,1890,5238,0,0,1,3,5.6,51.8,78.0,51.9,10.06,0.32,0.413,71.32
3,MOVIE_0004,Horror,R,Amazon,Alamo Drafthouse,2025-10-26,31.84,10,4,0,1,19.07,1315,2565,0,0,1,8,6.0,8.6,11.4,51.8,11.46,0.238,0.406,76.8
4,MOVIE_0005,Thriller,PG-13,Universal,Landmark,2024-11-22,20.27,3,1,0,0,12.22,4061,8221,0,1,0,3,5.2,8.1,38.0,44.4,13.97,0.341,0.755,81.04


In [0]:
import boto3
import pandas as pd
from botocore.exceptions import ClientError
import json
from decimal import Decimal

# Initialize DynamoDB resource
dynamodb = boto3.resource('dynamodb')
table_name = 'movies'

# Check if table exists, if not create it
def create_table_if_not_exists():
    try:
        table = dynamodb.Table(table_name)
        table.load()  # This will raise an exception if table doesn't exist
        print(f"Table '{table_name}' already exists")
        return table
    except ClientError as e:
        if e.response['Error']['Code'] == 'ResourceNotFoundException':
            print(f"Table '{table_name}' not found. Creating table...")
            
            # Create table with movie_id as primary key
            table = dynamodb.create_table(
                TableName=table_name,
                KeySchema=[
                    {
                        'AttributeName': 'movie_id',
                        'KeyType': 'HASH'  # Partition key
                    }
                ],
                AttributeDefinitions=[
                    {
                        'AttributeName': 'movie_id',
                        'AttributeType': 'S'  # String
                    }
                ],
                BillingMode='PAY_PER_REQUEST'  # On-demand billing
            )
            
            # Wait for table to be created
            print("Waiting for table to be created...")
            table.wait_until_exists()
            print(f"Table '{table_name}' created successfully!")
            return table
        else:
            raise e

# Helper function to convert pandas dtypes to DynamoDB compatible types
def convert_to_dynamodb_format(value):
    if pd.isna(value):
        return None
    elif isinstance(value, (int, float)):
        return Decimal(str(value))
    elif isinstance(value, bool):
        return bool(value)
    else:
        return str(value)

# Create table if it doesn't exist
table = create_table_if_not_exists()

# Convert dataframe to DynamoDB items
items_to_write = []
for index, row in df.iterrows():
    item = {}
    for column in df.columns:
        converted_value = convert_to_dynamodb_format(row[column])
        if converted_value is not None:  # Only add non-null values
            item[column] = converted_value
    items_to_write.append(item)

# Batch write items to DynamoDB (DynamoDB batch_writer handles batching automatically)
print(f"Writing {len(items_to_write)} items to DynamoDB table '{table_name}'...")

with table.batch_writer() as batch:
    for item in items_to_write:
        batch.put_item(Item=item)

print(f"Successfully wrote {len(items_to_write)} items to DynamoDB table '{table_name}'!")

# Verify by scanning a few items
print("\nFirst 5 items in the table:")
response = table.scan(Limit=5)
for i, item in enumerate(response['Items'], 1):
    print(f"Item {i}: {dict(item)}")

Table 'movies' not found. Creating table...
Waiting for table to be created...


Table 'movies' created successfully!
Writing 1000 items to DynamoDB table 'movies'...


Successfully wrote 1000 items to DynamoDB table 'movies'!

First 5 items in the table:
Item 1: {'movie_id': 'MOVIE_0251', 'marketing_spend_millions': Decimal('3.75'), 'avg_ticket_price': Decimal('14.01'), 'avg_competitor_rating': Decimal('7.4'), 'num_competing_movies': Decimal('3'), 'premium_format_pct': Decimal('0.333'), 'budget_millions': Decimal('11.22'), 'social_media_mentions_k': Decimal('5.8'), 'num_screens': Decimal('2297'), 'director_reputation': Decimal('2'), 'num_theaters': Decimal('1826'), 'rating': 'NR', 'theater_chain': 'Century', 'franchise_flag': Decimal('0'), 'studio': 'Paramount', 'is_weekend_release': Decimal('0'), 'genre': 'Horror', 'is_summer_release': Decimal('0'), 'star_power_score': Decimal('7'), 'release_date': '2025-04-11', 'urban_theater_pct': Decimal('0.595'), 'is_holiday_release': Decimal('0'), 'audience_score': Decimal('79.7'), 'critic_score': Decimal('61.1'), 'ticket_sales_millions': Decimal('52.9'), 'sequel_flag': Decimal('0')}
Item 2: {'movie_id': 'MOVIE

In [0]:
sql_output_d0we = sqlutils.sql("select * from movies limit 10", connection_name="default.sql")
sql_output_d0we

Unnamed: 0,movie_id,marketing_spend_millions,avg_ticket_price,avg_competitor_rating,num_competing_movies,premium_format_pct,budget_millions,social_media_mentions_k,num_screens,director_reputation,num_theaters,rating,theater_chain,franchise_flag,studio,is_weekend_release,genre,is_summer_release,star_power_score,release_date,urban_theater_pct,is_holiday_release,audience_score,critic_score,ticket_sales_millions,sequel_flag
0,MOVIE_0251,3.75,14.01,7.4,3,0.333,11.22,5.8,2297,2,1826,NR,Century,0,Paramount,0,Horror,0,7,2025-04-11,0.595,0,79.7,61.1,52.9,0
1,MOVIE_0686,20.51,14.89,8.2,4,0.424,35.68,11.0,7440,6,3373,PG-13,Showcase,0,Sony,0,Comedy,0,5,2024-10-07,0.739,0,92.4,14.8,88.95,0
2,MOVIE_0634,12.46,12.73,5.7,5,0.184,24.93,17.9,2839,9,1553,R,AMC,0,Disney,1,Documentary,0,9,2024-01-07,0.456,0,26.7,23.5,89.88,1
3,MOVIE_0940,11.59,9.95,6.8,7,0.292,27.96,11.5,8517,7,3623,NR,Marcus,0,Disney,0,Documentary,0,6,2024-10-22,0.673,0,65.9,82.7,89.92,0
4,MOVIE_0189,2.63,9.08,8.2,2,0.359,5.33,29.9,2142,6,1115,R,Alamo Drafthouse,1,MGM,0,Adventure,0,10,2025-03-26,0.553,0,34.1,55.6,66.08,0
5,MOVIE_0418,21.53,10.79,6.6,2,0.205,38.69,41.3,7874,4,2294,G,AMC,0,Warner Bros,0,Animation,0,5,2025-03-28,0.434,0,60.8,53.7,101.76,1
6,MOVIE_0330,2.46,15.87,7.6,2,0.265,8.09,10.7,2789,3,1051,PG-13,Cinemark,0,MGM,0,Drama,0,8,2024-04-30,0.471,0,40.2,89.0,64.62,1
7,MOVIE_0406,9.38,15.53,6.8,3,0.424,18.05,28.0,5740,7,2758,PG-13,Cinemark,1,Amazon,0,Sci-Fi,0,5,2024-12-05,0.778,1,73.6,60.9,106.75,1
8,MOVIE_0264,5.75,10.45,8.3,1,0.15,9.21,68.0,4808,2,3172,G,Alamo Drafthouse,1,Lionsgate,1,Adventure,0,5,2024-01-14,0.551,0,83.4,38.0,99.09,1
9,MOVIE_0711,10.52,11.46,5.7,5,0.204,16.01,14.5,1763,7,828,G,Cinemark,1,Universal,0,Horror,0,8,2025-01-09,0.514,0,88.8,72.4,64.7,0


In [0]:
sql_output_6fiu = sqlutils.sql("select * from movies where budget_millions > 25", connection_name="default.sql")
sql_output_6fiu

Unnamed: 0,movie_id,marketing_spend_millions,avg_ticket_price,avg_competitor_rating,num_competing_movies,premium_format_pct,budget_millions,social_media_mentions_k,num_screens,director_reputation,num_theaters,rating,theater_chain,franchise_flag,studio,is_weekend_release,genre,is_summer_release,star_power_score,release_date,urban_theater_pct,is_holiday_release,audience_score,critic_score,ticket_sales_millions,sequel_flag
0,MOVIE_0686,20.51,14.89,8.2,4,0.424,35.68,11.0,7440,6,3373,PG-13,Showcase,0,Sony,0,Comedy,0,5,2024-10-07,0.739,0,92.4,14.8,88.95,0
1,MOVIE_0940,11.59,9.95,6.8,7,0.292,27.96,11.5,8517,7,3623,NR,Marcus,0,Disney,0,Documentary,0,6,2024-10-22,0.673,0,65.9,82.7,89.92,0
2,MOVIE_0418,21.53,10.79,6.6,2,0.205,38.69,41.3,7874,4,2294,G,AMC,0,Warner Bros,0,Animation,0,5,2025-03-28,0.434,0,60.8,53.7,101.76,1
3,MOVIE_0864,59.75,13.18,8.0,0,0.354,101.96,17.9,5989,9,1908,NR,Marcus,0,Lionsgate,0,Action,1,4,2025-07-09,0.595,0,34.0,46.1,173.14,1
4,MOVIE_0004,19.07,11.46,6.0,8,0.238,31.84,8.6,2565,4,1315,R,Alamo Drafthouse,1,Amazon,1,Horror,0,10,2025-10-26,0.406,0,51.8,11.4,76.80,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
187,MOVIE_0147,41.33,9.69,8.2,2,0.415,62.91,45.5,2866,2,1511,PG-13,Showcase,0,Sony,0,Documentary,1,2,2024-06-25,0.730,0,50.1,74.9,104.52,0
188,MOVIE_0435,13.55,8.91,7.4,1,0.272,32.36,23.0,8551,7,3369,R,Century,1,Warner Bros,0,Comedy,0,5,2024-02-14,0.471,0,68.0,22.8,102.54,0
189,MOVIE_0402,12.39,12.79,7.7,4,0.164,36.95,47.5,8182,1,3556,PG-13,AMC,1,Netflix,0,Sci-Fi,1,10,2024-08-07,0.650,0,55.4,69.1,114.61,0
190,MOVIE_0061,52.43,13.87,6.6,2,0.327,76.64,14.0,2802,9,1420,R,Landmark,0,Disney,0,Action,0,8,2024-10-25,0.511,0,84.0,36.7,151.83,1
