# Movie Success Prediction
This notebook predicts expected revenue and popularity for given budget and genres.

In [14]:
import pandas as pd
import numpy as np
from sklearn.decomposition import PCA
import nbformat
import joblib
import json

In [15]:
# Load relevant inputs
with open('input.json', 'r') as f:
    input_data = json.load(f)

budget = input_data['budget']
genre = input_data['genres']

In [16]:
# Load models and scalers
linear_regressor = joblib.load('models/linear_regressor.joblib')
pca_scaler = joblib.load('models/pca_scaler.joblib')
budget_scaler = joblib.load('models/budget_scaler.joblib')
pca = joblib.load('models/pca.joblib')
mlb = joblib.load('models/multilabel_binarizer.joblib')

In [None]:
def predict_success(budget, desired_genres):
    """Predict movie success metrics for given budget and genres."""
    # Create input data
    input_data = {'budget': budget}
    
    # Add genres
    for genre in mlb.classes_:
        input_data[genre] = 1 if genre in desired_genres else 0
    
    # Create DataFrame and scale budget
    input_df = pd.DataFrame([input_data])
    input_df['budget'] = budget_scaler.transform(input_df[['budget']])
    
    # Predict PCA value
    predicted_pca = linear_regressor.predict(input_df)
    
    # Create the complete PCA array (we need both components)
    full_pca = np.zeros((1, 2))
    full_pca[:, 0] = predicted_pca.ravel()  # First component from prediction
    full_pca[:, 1] = 0  # Second component set to 0 since we're predicting only the first
    
    # Transform back to original scale
    original_scaled = pca.inverse_transform(full_pca)
    original_features = pca_scaler.inverse_transform(original_scaled)
    
    return original_features[0][1], original_features[0][0]  # Return revenue and popularity

In [23]:
expected_revenue, expected_popularity = predict_success(budget, genre)
print(f"Input Budget: ${budget:,.2f}")
print(f"Desired Genres: {genre}")
print(f"\nExpected Revenue: ${expected_revenue:,.2f}")
print(f"Expected Popularity Score: {expected_popularity:.2f}")

Input Budget: $100,000,000.00
Desired Genres: ['Action', 'Adventure']

Expected Revenue: $297,526,464.36
Expected Popularity Score: 40.13


In [None]:
# Prepare the output data
output_data = {
    'expected_revenue': expected_revenue,
    'expected_popularity': expected_popularity
}

# Write the output data to a JSON file
with open('results/predicted_success_output.json', 'w') as f:
    json.dump(output_data, f, indent=4)

print("Output successfully written to predicted_success_output.json")