# Ideal Budget Prediction and Genre Suggestions

This notebook predicts the ideal budget and suggests genres based on desired revenue and popularity metrics.

In [1]:
import pandas as pd
import numpy as np
import joblib
from sklearn.neighbors import NearestNeighbors
import matplotlib.pyplot as plt
import json

In [2]:
# Load relevant inputs
with open('input.json', 'r') as f:
    input_data = json.load(f)

desired_revenue = input_data['desired_revenue']
desired_popularity = input_data['desired_popularity']

In [3]:
# Load the saved models and scalers
linear_regressor = joblib.load('models/linear_regressor.joblib')
pca_scaler = joblib.load('models/pca_scaler.joblib')
budget_scaler = joblib.load('models/budget_scaler.joblib')
pca = joblib.load('models/pca.joblib')
mlb = joblib.load('models/multilabel_binarizer.joblib')

In [4]:
def predict_ideal_budget(desired_revenue, desired_popularity):
    """Calculate ideal budget for desired revenue and popularity."""
    # Scale the desired metrics
    scaled_desired = pca_scaler.transform([[desired_popularity, desired_revenue]])
    
    # Transform to PCA space
    desired_pca = pca.transform(scaled_desired)
    
    # Get model coefficients
    coefficients = linear_regressor.coef_[0]
    intercept = linear_regressor.intercept_[0]
    
    # Calculate ideal budget using inverse regression
    ideal_budget = (desired_pca[0][0] - intercept) / coefficients[0]
    
    # Transform back to original scale
    ideal_budget = budget_scaler.inverse_transform(ideal_budget.reshape(-1, 1))[0][0]
    
    return ideal_budget

In [5]:
def find_nearest_genres(ideal_budget, n_neighbors=3):
    """Find most suitable genres based on ideal budget."""
    # Load training data
    X_train = pd.read_csv('../data/processed_features.csv')
    
    # Create dummy instance with ideal budget
    dummy_instance = {'budget': ideal_budget}
    dummy_df = pd.DataFrame([dummy_instance])
    
    # Fill missing genre columns with 0
    for genre in mlb.classes_:
        if genre not in dummy_df.columns:
            dummy_df[genre] = 0
    
    # Initialize and fit NearestNeighbors
    nn = NearestNeighbors(n_neighbors=n_neighbors, metric='euclidean')
    nn.fit(X_train)
    
    # Find nearest neighbors
    distances, indices = nn.kneighbors(dummy_df)
    
    # Get most common genres from neighbors
    genre_columns = [col for col in X_train.columns if col in mlb.classes_]
    nearest_genres = X_train.iloc[indices[0]][genre_columns].sum().sort_values(ascending=False)
    return nearest_genres.index[:3].tolist()

In [6]:
# Calculate ideal budget
ideal_budget = predict_ideal_budget(desired_revenue, desired_popularity)
print(f"Ideal Budget: ${ideal_budget:,.2f}")

# Get genre suggestions
suggested_genres = find_nearest_genres(ideal_budget)
print(f"\nSuggested Genres: {suggested_genres}")



Ideal Budget: $11,652,576.60

Suggested Genres: ['Action', 'Science Fiction', 'Adventure']


In [7]:
# Prepare the output data
output_data = {
    'ideal_budget': ideal_budget,
    'suggested_genres': suggested_genres
}

# Write the output data to a JSON file
with open('results/predicted_parameters_output.json', 'w') as f:
    json.dump(output_data, f, indent=4)

print("Output successfully written to predicted_parameters_output.json")

Output successfully written to predicted_parameters_output.json
