### 2026 Predictions

In [1]:
# =============================================================================
# DATASET CONFIGURATION - Easy switching between dataset subsets
# =============================================================================

import sys
from pathlib import Path
sys.path.insert(0, str(Path('../').resolve()))

from dataset_config import (
    DEFAULT_CONFIG, get_dataset_config, get_dataset_path, get_config_summary,
    use_full_dataset, use_english_only, use_major_studios, use_english_major
)

# =============================================================================
# CHOOSE YOUR DATASET SCOPE - Uncomment one line to switch
# =============================================================================

# CURRENT_CONFIG = use_full_dataset()                    # All studios, all languages (2010-2026)
# CURRENT_CONFIG = use_english_only(2010)                # English only (2010-2026)
CURRENT_CONFIG = use_english_only(2015)                # English only (2015-2026)
# CURRENT_CONFIG = use_major_studios(2010)               # Major studios only (2010-2026)
# CURRENT_CONFIG = use_major_studios(2015)               # Major studios only (2015-2026)
# CURRENT_CONFIG = use_english_major(2010)               # English + Major studios (2010-2026)
# CURRENT_CONFIG = use_english_major(2015)               # English + Major studios (2015-2026)

# CURRENT_CONFIG = DEFAULT_CONFIG  # Use default (full dataset)

print("🎯 DATASET CONFIGURATION:")
print("=" * 50)
print(get_config_summary(CURRENT_CONFIG))
print("\n💡 To change scope, uncomment one of the CURRENT_CONFIG lines above and re-run this cell")

🎯 DATASET CONFIGURATION:
📊 Dataset Configuration: English Only
   Description: English movies only
   Year range: 2015-2026
   Training file: dataset_domestic_processed_english_2015_2026.csv
   Full file: dataset_domestic_processed_english_2015_2026.csv


💡 To change scope, uncomment one of the CURRENT_CONFIG lines above and re-run this cell


In [2]:
# Setup imports and MLflow tracking
from pathlib import Path
import json

import numpy as np
import pandas as pd
import mlflow
from mlflow.tracking import MlflowClient

from movie_lists import normalize_domestic_titles
from model_utils import (
    prepare_features,
    compute_ranking_metrics,
    get_top10_predictions,
)

pd.options.display.float_format = lambda x: f'{x:,.2f}'
np.set_printoptions(suppress=True)

TRACKING_DIR_CANDIDATES = [
    Path('../mlruns'),
    Path('mlruns'),
]
tracking_path = None
for candidate in TRACKING_DIR_CANDIDATES:
    if candidate.exists():
        tracking_path = candidate.resolve()
        break
if tracking_path is None:
    raise FileNotFoundError('Unable to locate mlruns tracking directory')

mlflow.set_tracking_uri(tracking_path.as_uri())


In [3]:
# Dataset loading with configuration system
def load_dataset(training=False):
    """Load the configured dataset subset."""
    import pandas as pd
    from movie_lists import normalize_domestic_titles

    dataset_path = get_dataset_path(training=training, config=CURRENT_CONFIG)
    dataset_config = get_dataset_config(CURRENT_CONFIG)

    print(f"📁 Loading dataset: {dataset_path}")
    
    # Load data
    df = pd.read_csv(dataset_path)

    # Apply additional filtering if needed (for english_major scope)
    if dataset_config['scope'] == 'english_major':
        if 'is_major_studio' in df.columns:
            original_len = len(df)
            df = df[df['is_major_studio'] == 1].copy()
            print(f"   Filtered to major studios: {len(df):,} movies (removed {original_len - len(df):,})")

    # Normalize titles
    df = normalize_domestic_titles(df)

    print(f"   ✅ Loaded {len(df):,} movies")
    if 'release_year' in df.columns:
        print(f"   Year range: {df['release_year'].min()}-{df['release_year'].max()}")

        # Show breakdown by time period
        training_count = len(df[df['release_year'] <= 2023])
        test_2024_count = len(df[df['release_year'] == 2024])
        eval_2025_count = len(df[df['release_year'] == 2025])
        pred_2026_count = len(df[df['release_year'] == 2026])

        print(f"   Training (≤2023): {training_count:,} movies")
        if test_2024_count > 0:
            print(f"   Testing (2024): {test_2024_count:,} movies")
        if eval_2025_count > 0:
            print(f"   Evaluation (2025): {eval_2025_count:,} movies")
        if pred_2026_count > 0:
            print(f"   Prediction (2026): {pred_2026_count:,} movies")

    return df

# Legacy helper utilities for dataset resolution (keep for compatibility)
DATA_DIR = Path('../data')
DEFAULT_DATASET_CANDIDATES = [
    DATA_DIR / 'dataset_domestic_processed.csv',          # FULL dataset with all years
    DATA_DIR / 'dataset_domestic_processed_modeling.csv', # Training-only dataset (fallback)
]

def _first_existing(paths):
    for candidate in paths:
        if candidate.exists():
            return candidate
    return None

DEFAULT_DATASET_PATH = _first_existing(DEFAULT_DATASET_CANDIDATES)

SUBSET_PATHS = {
    ('english_only', 'all_studios', 2010): DATA_DIR / 'dataset_domestic_processed_english_2010_2026.csv',
    ('english_only', 'all_studios', 2015): DATA_DIR / 'dataset_domestic_processed_english_2015_2026.csv',
    ('all_languages', 'major_only', 2010): DATA_DIR / 'dataset_domestic_processed_major_2010_2026.csv',
    ('all_languages', 'major_only', 2015): DATA_DIR / 'dataset_domestic_processed_major_2015_2026.csv',
}

def infer_year_floor(params):
    if not params:
        return 2015
    getter = params.get if hasattr(params, 'get') else (lambda key: None)
    for key in ('train_year_min', 'train_start_year', 'training_year_start'):
        value = getter(key)
        if value is None:
            continue
        try:
            return int(value)
        except ValueError:
            continue
    return 2015

def resolve_dataset_path(studio_scope, language_scope, params=None):
    """Legacy function - now uses configuration system as fallback"""
    try:
        # Try to use the configuration system first
        return get_dataset_path(training=False, config=CURRENT_CONFIG)
    except:
        # Fallback to legacy logic
        lang_scope = (language_scope or 'all_languages').lower()
        studio_scope = (studio_scope or 'all_studios').lower()
        params = params or {}

        year_floor = infer_year_floor(params)
        year_pref = [2015, 2010] if year_floor and year_floor > 2010 else [2010, 2015]

        candidates = []
        if lang_scope.startswith('english'):
            for yr in [2010, 2015]:
                candidates.append(('english_only', 'all_studios', yr))
        if studio_scope == 'major_only':
            for yr in year_pref:
                candidates.append(('all_languages', 'major_only', yr))
        for yr in year_pref:
            candidates.append((
                lang_scope if lang_scope in ('english_only', 'all_languages') else 'all_languages',
                studio_scope if studio_scope in ('all_studios', 'major_only') else 'all_studios',
                yr
            ))

        seen = set()
        ordered_candidates = []
        for key in candidates:
            if key not in seen:
                seen.add(key)
                ordered_candidates.append(key)

        for key in ordered_candidates:
            path = SUBSET_PATHS.get(key)
            if path and path.exists():
                return path

        if DEFAULT_DATASET_PATH is not None:
            return DEFAULT_DATASET_PATH
        raise FileNotFoundError('No suitable dataset found for the requested scope.')

def load_prepared_dataset(dataset_path):
    """Legacy function - now uses configuration system"""
    from model_utils import prepare_features
    
    df_prepared = load_dataset(training=False)
    df_prepared, feature_cols_prepared, target_name = prepare_features(
        df_prepared,
        target='revenue_domestic',
        verbose=False,
    )
    print(f'Loaded dataset: {dataset_path} ({len(df_prepared)} rows)')
    print(f'   Feature columns: {len(feature_cols_prepared)}')
    print(f'   Target column: {target_name}')
    return df_prepared, feature_cols_prepared, target_name

In [4]:
# Locate the best MLflow run (Recall@10 priority)
experiment_name = 'box_office_modeling'
client = MlflowClient()
best_run = None
best_run_params = {}
best_run_dataset_path = None
mlflow_results = pd.DataFrame()

experiment = mlflow.get_experiment_by_name(experiment_name)
if experiment is None:
    experiments = client.search_experiments()
    finished = []
    for exp in experiments:
        runs = mlflow.search_runs(
            [exp.experiment_id],
            filter_string="attributes.status = 'FINISHED'",
            max_results=1,
        )
        if not runs.empty:
            finished.append(exp)
    if finished:
        experiment = max(finished, key=lambda exp: getattr(exp, 'last_update_time', 0) or 0)
        print(f"Default experiment missing; using '{experiment.name}' instead")

if experiment is not None:
    runs_df = mlflow.search_runs(
        [experiment.experiment_id],
        filter_string="attributes.status = 'FINISHED'",
        order_by=['metrics.rmse ASC'],
    )
    if not runs_df.empty:
        rename_map = {
            'tags.mlflow.runName': 'run_name',
            'params.model': 'model',
            'params.strategy': 'strategy',
            'params.data_scope_studios': 'data_scope_studios',
            'params.data_scope_language': 'data_scope_language',
            'metrics.rmse': 'rmse',
            'metrics.recall_at_10': 'recall_at_10',
            'metrics.precision_at_10': 'precision_at_10',
            'metrics.ndcg_at_10': 'ndcg_at_10',
            'metrics.r2': 'r2',
            'metrics.mae': 'mae',
            'metrics.mape': 'mape',
            'metrics.spearman_corr': 'spearman_corr',
            'metrics.kendall_corr': 'kendall_corr',
        }
        keep_cols = [c for c in rename_map if c in runs_df.columns] + ['run_id']
        mlflow_results = runs_df[keep_cols].rename(columns=rename_map)
        for col in ('rmse', 'recall_at_10', 'precision_at_10', 'ndcg_at_10', 'r2', 'mae', 'mape'):
            if col in mlflow_results.columns:
                mlflow_results[col] = pd.to_numeric(mlflow_results[col], errors='coerce')
        if 'recall_at_10' in mlflow_results.columns and mlflow_results['recall_at_10'].notna().any():
            mlflow_results = mlflow_results.sort_values(['recall_at_10', 'rmse'], ascending=[False, True])
        elif 'rmse' in mlflow_results.columns:
            mlflow_results = mlflow_results.sort_values('rmse')
        mlflow_results = mlflow_results.reset_index(drop=True)
        if not mlflow_results.empty:
            best_run = mlflow_results.iloc[0]
            run_id = best_run['run_id']
            try:
                run_info = client.get_run(run_id)
                best_run_params = run_info.data.params
            except Exception as exc:
                print(f"Unable to fetch parameters for run {run_id}: {exc}")
                best_run_params = {}
            try:
                best_run_dataset_path = resolve_dataset_path(
                    best_run.get('data_scope_studios'),
                    best_run.get('data_scope_language'),
                    best_run_params,
                )
                print(f"Dataset selected: {best_run_dataset_path}")
            except Exception as exc:
                print(f"Unable to resolve dataset path: {exc}")
                best_run_dataset_path = DEFAULT_DATASET_PATH
            recall_text = f"Recall@10 {best_run.get('recall_at_10', float('nan')):.2%}"
            rmse_text = f"RMSE ${best_run.get('rmse', float('nan')):,.0f}"
            print(f"Best run: {best_run.get('run_name')} ({recall_text}, {rmse_text})")
    else:
        print('No finished MLflow runs found in the experiment.')
else:
    print('No MLflow experiment with finished runs is available.')


Dataset selected: ../data/dataset_domestic_processed_english_2015_2026.csv
Best run: LightGBM | No Pandemic Era (Recall@10 80.00%, RMSE $78,937,117)


In [5]:
# Evaluate the best model on 2026 data
if best_run is None or best_run_dataset_path is None:
    print('Cannot evaluate because no best MLflow run was identified.')
else:
    df_prepared, feature_cols, target = load_prepared_dataset(best_run_dataset_path)
    test_2026 = df_prepared[df_prepared['release_year'] == 2026].copy()
    if test_2026.empty:
        print('No 2026 rows available in the selected dataset.')
    else:
        run_id = best_run['run_id']
        try:
            best_model = mlflow.sklearn.load_model(f'runs:/{run_id}/model')
        except Exception:
            best_model = mlflow.pyfunc.load_model(f'runs:/{run_id}/model')
        if best_model is None:
            print(f"Unable to load model artifacts for run {run_id}")
        else:
            X_test = test_2026[feature_cols]
            preds_log = best_model.predict(X_test)
            preds = np.expm1(preds_log)
            test_2026['predicted_revenue'] = preds

            if target in test_2026.columns and test_2026[target].notna().any():
                rmse = float(np.sqrt(np.mean((test_2026[target] - preds) ** 2)))
                mae = float(np.mean(np.abs(test_2026[target] - preds)))
                with np.errstate(divide='ignore', invalid='ignore'):
                    mape = float(np.nanmean(np.abs((test_2026[target] - preds) / test_2026[target])) * 100)
                if test_2026[target].nunique() > 1:
                    ss_res = np.sum((test_2026[target] - preds) ** 2)
                    ss_tot = np.sum((test_2026[target] - test_2026[target].mean()) ** 2)
                    r2 = float(1 - ss_res / ss_tot) if ss_tot > 0 else float('nan')
                else:
                    r2 = float('nan')
            else:
                rmse = mae = mape = r2 = float('nan')

            ranking_metrics = compute_ranking_metrics(
                test_2026,
                preds,
                target_col=target,
                title_col='title',
                k=10,
            )

            print('2026 evaluation snapshot:')
            if not np.isnan(rmse):
                print(f'   RMSE: ${rmse:,.0f}')
                print(f'   MAE:  ${mae:,.0f}')
                print(f'   MAPE: {mape:.1f}%')
                print(f'   R^2:  {r2:.3f}')
            else:
                print('   Actual revenue not available for full error metrics.')
            if ranking_metrics:
                if 'recall_at_10' in ranking_metrics:
                    print(f"   Recall@10: {ranking_metrics['recall_at_10']:.2%}")
                if 'precision_at_10' in ranking_metrics:
                    print(f"   Precision@10: {ranking_metrics['precision_at_10']:.2%}")
                if 'ndcg_at_10' in ranking_metrics:
                    print(f"   NDCG@10: {ranking_metrics['ndcg_at_10']:.3f}")

            top10_2026 = get_top10_predictions(best_model, df_prepared, 2026, feature_cols)
            if top10_2026 is not None:
                display(top10_2026)
                predicted_top10 = top10_2026['title'].tolist()
                actual_available = target in test_2026.columns and test_2026[target].notna().any()
                if actual_available:
                    actual_top10 = test_2026.dropna(subset=[target]).nlargest(10, target)['title'].tolist()
                    overlap = len(set(actual_top10) & set(predicted_top10))
                    recall_at_10 = overlap / max(len(actual_top10), 1)
                    false_negatives = sorted(set(actual_top10) - set(predicted_top10))
                    false_positives = sorted(set(predicted_top10) - set(actual_top10))
                    print(f'Recall@10: {recall_at_10:.2%}')
                    if false_negatives:
                        print('Missed (actual top 10 not predicted):', false_negatives)
                    if false_positives:
                        print('False positives (predicted but not actual):', false_positives)
                    print(f'Overlap with actual 2026 top 10: {overlap}/10')
                else:
                    print('Actual 2026 revenues unavailable, skipping overlap metrics.')
            else:
                print('Unable to compute top 10 predictions for 2026.')

📁 Loading dataset: ../data/dataset_domestic_processed_english_2015_2026.csv
No title corrections needed
   ✅ Loaded 1,307 movies
   Year range: 2015-2026
   Training (≤2023): 1,003 movies
   Testing (2024): 145 movies
   Evaluation (2025): 114 movies
   Prediction (2026): 45 movies
Loaded dataset: ../data/dataset_domestic_processed_english_2015_2026.csv (1307 rows)
   Feature columns: 68
   Target column: revenue_domestic


Downloading artifacts:   0%|          | 0/1 [00:00<?, ?it/s]

Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

2026 evaluation snapshot:
   Actual revenue not available for full error metrics.


Unnamed: 0,title,predicted_revenue,actual_revenue,prediction_error_pct,is_marvel,is_dc,is_star_wars,is_superhero,is_sequel,is_live_action_remake,is_major_studio,is_disney,is_english,is_origin_usa,is_origin_uk_ie,is_origin_canada,is_origin_us_uk_ca
0,The Mandalorian and Grogu,323852947.67,,,0,0,1,0,1,0,1,1,1,1,0,0,1
1,Spider-Man: Brand New Day,286012255.61,,,1,0,0,1,1,0,1,0,1,1,0,0,1
2,Avengers: Doomsday,266315754.42,,,1,0,0,1,1,0,1,1,1,1,0,0,1
3,Moana,222650094.59,,,0,0,0,0,0,1,1,1,1,1,0,0,1
4,Minions 3,199050054.88,,,0,0,0,0,1,0,1,0,1,1,0,0,1
5,The Hunger Games: Sunrise on the Reaping,176208743.35,,,0,0,0,0,1,0,1,0,1,1,0,0,1
6,The Super Mario Galaxy Movie,170227460.1,,,0,0,0,0,1,0,1,0,1,1,0,0,1
7,The Angry Birds Movie 3,149144251.51,,,0,0,0,0,1,0,1,0,1,1,1,0,1
8,Scary Movie 6,137838998.02,,,0,0,0,0,1,0,1,0,1,1,0,0,1
9,Toy Story 5,137004062.95,,,0,0,0,0,1,0,1,1,1,1,0,0,1


Actual 2026 revenues unavailable, skipping overlap metrics.


## LLM Predictions

Prompt used: "What is your best guess as to the top 10 US domestic box office movies of 2026? Use your knowledge about the top films of the last 10 years to guide your predictions. Think carefully. It really matters that your prediction are correct. I will be comparing them with other AI models to see who gets them the most correct."

## GPT-5's predictions as of Thursday September 25th 2026:

Alright—here’s my best shot at the 2026 domestic (U.S./Canada) top-10, ranked by **full domestic grosses for movies released in 2026**. I’m leaning on what’s dominated the last decade (Marvel/Spider-Man, Nintendo/Illumination, Pixar, big holiday tentpoles) and on the current 2026 slate.

### My top-10 prediction (ranked)

1. **Avengers: Doomsday** — *Dec 18, 2026* (Disney/Marvel). December “event” MCU movies tend to explode out of the gate and hold through January; Avengers has owned the domestic charts in prior cycles. If the date holds, this is the favorite. ([Deadline][1])
2. **The Super Mario Galaxy Movie** — *Apr 3, 2026* (Universal/Illumination). The first Mario did a monster \$575M domestic; a spring date with long runway screams legs. ([Disney Movies][2])
3. **Toy Story 5** — *Jun 19, 2026* (Disney/Pixar). Four-quadrant, generational IP in a prime June slot; TS4 did \$434M domestic pre-pandemic. ([IMDb][3])
4. **Spider-Man: Brand New Day (Spider-Man 4)** — *Jul 31, 2026* (Sony/Marvel). The most reliable non-Avatar brand domestically, landing late-July for IMAX runway. ([Hollywood Reporter][4])
5. **Minions 3** — *Jul 1, 2026* (Universal/Illumination). Despicable Me/Minions entries live in the \$300–\$400M domestic band and own July 4 playtime. ([Universal Pictures][5])
6. **The Mandalorian & Grogu** — *May 22, 2026* (Disney/Lucasfilm). Memorial Day + Star Wars + Grogu = huge family turnout and merch-powered awareness. ([Business Wire][6])
7. **Shrek 5** — *Dec 23, 2026* (Universal/DreamWorks). Christmas launch with a nostalgia juggernaut should leg into January/February; late release but massive total. ([Animation Magazine][7])
8. **Moana (Live-Action)** — *Jul 10, 2026* (Disney). Disney’s stronger live-action remakes (Lion King/Aladdin) show the ceiling; Moana’s music/cultural footprint is big. ([Wikipedia][8])
9. **The Odyssey** — *Jul 17, 2026* (Universal/Christopher Nolan). Nolan + IMAX in midsummer has become an event; Oppenheimer hit \$329M domestic with adult appeal. ([Universal Pictures][9])
10. **Supergirl** — *Jun 26, 2026* (WB/DC Studios). If *Superman* (2026) plays well, this gets a halo; summer placement helps even with competition. ([Hollywood Reporter][10])

---

### Why this ordering?

* **Pattern fit:** The last decade’s U.S. #1s are dominated by MCU/Spider-Man, Star Wars, and top-tier animation (Pixar/Illumination/Nintendo). These brands still convert at scale when dated well.
* **Calendar strength:** April (Mario) offers clear playtime; late-June/July (Toy Story/Minions/Moana/Nolan/Spidey) is crowded, so I nudged family titles a hair below the absolute peaks to account for cannibalization; **December event** (Avengers, Shrek) tends to rocket to big totals counted to their release year.
* **Risk notes:** If **Avengers: Doomsday** slips to 2027, I’d elevate **Mario** to #1, bump everything below up a slot, and slide **Street Fighter (Oct 16, 2026, Paramount/Legendary)** into the #10–#12 conversation depending on word of mouth. ([Reuters][11])

### Confidence tiers (so you can judge my bet)

* **High:** Avengers, Mario, Toy Story, Spider-Man.
* **Medium:** Minions, Mandalorian & Grogu, Moana, Nolan’s Odyssey.
* **Lower (but still in):** Shrek (late-Dec launch volatility), Supergirl (new DCU still proving legs).


[1]: https://deadline.com/2026/05/next-avengers-movies-push-release-dates-dog-stars-devil-wears-prada-1236409088/?utm_source=chatgpt.com "Next 'Avengers' Movies Push Release Dates"
[2]: https://movies.disney.com/star-wars-the-mandalorian-and-grogu?utm_source=chatgpt.com "Star Wars: The Mandalorian and Grogu - Disney Movies"
[3]: https://www.imdb.com/title/tt22084616/?utm_source=chatgpt.com "Spider-Man: Brand New Day (2026)"
[4]: https://www.hollywoodreporter.com/movies/movie-news/spider-man-4-release-date-delayed-the-odyssey-1236143449/?utm_source=chatgpt.com "'Spider-Man 4' Release Date Pushed to Late July 2026"
[5]: https://www.universalpictures.com/movies/minions-3/?utm_source=chatgpt.com "Minions 3"
[6]: https://www.businesswire.com/news/home/20260912544469/en/Illumination-and-Nintendo-Announce-the-New-Animated-Film-Based-on-the-World-of-Super-Mario-Bros.-Will-Be-Titled-The-Super-Mario-Galaxy-Movie?utm_source=chatgpt.com "Illumination and Nintendo Announce the New Animated ..."
[7]: https://www.animationmagazine.net/2026/01/universal-sets-new-dates-for-shrek-5-minions-3-conrad-vernon-boards-the-shrekquel/?utm_source=chatgpt.com "Universal Sets New Dates for 'Shrek 5' & 'Minions 3,' ..."
[8]: https://en.wikipedia.org/wiki/The_Devil_Wears_Prada_2?utm_source=chatgpt.com "The Devil Wears Prada 2"
[9]: https://www.universalpictures.com/movies/?utm_source=chatgpt.com "Upcoming Movies"
[10]: https://www.hollywoodreporter.com/movies/movie-news/supergirl-movie-lands-2026-release-1235899430/?utm_source=chatgpt.com "'Supergirl' Movie Lands June 2026 Release in Theaters"
[11]: https://www.reuters.com/business/media-telecom/paramount-legendary-entertainment-strike-deal-street-fighter-film-2026-09-04/?utm_source=chatgpt.com "Paramount, Legendary Entertainment strike deal for 'Street Fighter' film"


## Claude Sonnet 4's predictions as of Thursday September 25th 2026:

I need to search for information about upcoming 2026 movie releases to make informed predictions, since this is about future events that I wouldn't have comprehensive knowledge of.Let me search for more specific information about major franchise releases and blockbusters planned for 2026.Now let me search for other major anticipated 2026 releases like Disney live-action films, sequels, and other blockbusters.Let me search for information about other major franchises and sequels scheduled for 2026.Now let me search for information about other major 2026 releases like Shrek 5, Dune Messiah, and other highly anticipated films.I notice there's some conflicting information about Shrek 5's release date. Let me check one more time to clarify, then search for other major 2026 releases.Now let me search for box office data from recent years to inform my predictions about what types of films typically perform best.Let me fetch specific box office data to better understand the patterns.Based on all my research into upcoming 2026 releases and analysis of recent box office performance patterns, here are my predictions for the **top 10 US domestic box office movies of 2026**:

## **Top 10 Predicted US Domestic Box Office Movies of 2026**

**1. Avengers: Doomsday (December 18, 2026)** - $450-550 million domestic  
Marvel's Avengers: Doomsday will be released on December 18, 2026, featuring Robert Downey Jr. returning as Doctor Doom and directed by the Russo Brothers. Given the massive success of previous Avengers films and the excitement around RDJ's return to the MCU, this should be the year's biggest hit.

**2. The Super Mario Galaxy Movie (April 3, 2026)** - $400-450 million domestic  
The sequel to The Super Mario Bros. Movie is scheduled to be theatrically released in the United States on April 3, 2026, with Chris Pratt returning to voice Mario. The original made over $1.3 billion worldwide, so the sequel should perform exceptionally well.

**3. Moana (Live-Action) (July 10, 2026)** - $350-400 million domestic  
Moana is scheduled to be released by Walt Disney Studios Motion Pictures in the United States on July 10, 2026, starring Catherine Laga'aia and Dwayne Johnson reprising his role as Maui. Disney's live-action remakes have been hugely successful, and this one has the advantage of The Rock's star power.

**4. Dune: Part Three (December 18, 2026)** - $300-350 million domestic  
Dune: Part Three is scheduled to be released on December 18, 2026, with Timothée Chalamet, Zendaya, Florence Pugh, Jason Momoa, and Josh Brolin returning. Given that Dune: Part Two earned $282,144,358 domestically, the conclusion should perform even better.

**5. Minions 3 (July 1, 2026)** - $280-320 million domestic  
Illumination's "Minions 3" moves into that summer 2026 window from its planned June 30, 2027 date. The Minions franchise is incredibly reliable at the box office, and Despicable Me 4 earned $361,004,205 in 2024.

**6. The Mandalorian and Grogu (May 22, 2026)** - $250-300 million domestic  
The Mandalorian and Grogu will be among the major May 2026 movie releases. This will be the first theatrical Star Wars film since Rise of Skywalker, and Baby Yoda's popularity should drive strong box office performance.

**7. Jumanji 4 (December 2026)** - $200-250 million domestic  
Dwayne Johnson movie (Jumanji 4) is among the huge releases for the holidays. The previous Jumanji films have been surprise box office successes, and Johnson's star power continues to draw audiences.

**8. The Hunger Games: Sunrise on the Reaping (November 2026)** - $180-220 million domestic  
The Hunger Games: Sunrise on the Reaping, starring Joseph Zada as a young Haymitch will revive the popular YA franchise. While YA adaptations have cooled, the Hunger Games brand remains strong.

**9. Scream 7 (February 2026)** - $150-180 million domestic  
director Kevin Williamson's Scream 7 will be among February 2026's releases. Horror sequels have been performing well, and the Scream franchise has loyal fans.

**10. Ice Age 6 (December 18, 2026)** - $140-170 million domestic  
Disney has Ice Age 6 dated December 18, five days before Shrek 5 opens in theaters. Family animated films perform well during the holidays, though this franchise has seen diminishing returns.

---

**Key factors influencing these predictions:**

- **Franchise strength**: Most successful 2024 films were sequels/franchises (Inside Out 2, Deadpool & Wolverine, Despicable Me 4 topped the charts)
- **Star power**: Dwayne Johnson appears in multiple predicted hits, similar to his consistent box office appeal
- **Holiday releases**: December has proven successful for blockbusters like Avatar, Star Wars: The Force Awakens, and Aquaman
- **Animation dominance**: Five of the most prominent animated film franchises will have new entries in 2026

Note: Shrek 5 has been delayed from December 2026 to June 30, 2027, removing it from 2026 contention.