In [13]:
from google.cloud import bigquery

project_id = "dylanericsp25"
dataset = "movies_entertainment_mrt"
region = "us-central1"

bq_client = bigquery.Client()

dataset_id = bigquery.Dataset(f"{project_id}.{dataset}")
dataset_id.location = region
resp = bq_client.create_dataset(dataset_id, exists_ok=True)
print("Created dataset {}.{}".format(bq_client.project, resp.dataset_id))

Created dataset dylanericsp25.movies_entertainment_mrt


In [2]:
%load_ext google.cloud.bigquery



**1.) Which companies produce the highest-grossing movies, and how do their box office earnings compare?**

Company Revenue Mart

In [15]:
%%bigquery
CREATE or REPLACE table movies_entertainment_mrt.company_revenue AS
SELECT 
    c.brand, 
    SUM(b.lifetime_gross) AS total_lifetime_gross
FROM movies_entertainment_int.Companies c
JOIN movies_entertainment_int.Box_Office_Gross b ON c.brand = b.brand
GROUP BY c.brand
ORDER BY total_lifetime_gross DESC

Query is running:   0%|          |

Top Performing Companies by Revenue & Releases Mart

In [None]:
%%bigquery
CREATE or REPLACE table movies_entertainment_mrt.top_performing_companies_by_revenue_and_releases AS
SELECT 
    c.brand, 
    SUM(b.lifetime_gross) AS total_lifetime_gross, 
    COUNT(b.id) AS total_releases
FROM movies_entertainment_int.Companies c
JOIN movies_entertainment_int.Box_Office_Gross b ON c.brand = b.brand
GROUP BY c.brand
ORDER BY total_lifetime_gross DESC, total_releases DESC

Query is running:   0%|          |

Downloading:   0%|          |

Unnamed: 0,brand,total_lifetime_gross,total_releases
0,Tim Burton-Johnny Depp,2673528880,8
1,DC Comics,1066690716,2
2,Legendary Pictures,652270625,1
3,DreamWorks Animation,441226247,1
4,Hasbro,402111870,1
5,MonsterVerse,401352138,2
6,Nickelodeon,382409508,2
7,Illumination Entertainment,368384330,1
8,Blumhouse Productions,352081330,2
9,Stephen King,327481748,1


**2.) What are the most title releases by type, genre, original release year, and rating? What does the budget look like for these movies(grouped by language for currency purposes)?**

Netflix Performance Mart

In [17]:
%%bigquery
CREATE or REPLACE table movies_entertainment_mrt.netflix_performance AS
SELECT 
    n.original_type, 
    m.genre, 
    n.original_release_year, 
    n.original_rating, 
    COUNT(n.id) AS total_titles
FROM movies_entertainment_int.Netflix_Movies_And_Tvshows n
JOIN movies_entertainment_int.Movies_Metadata m ON n.id = SAFE_CAST(m.id as STRING)
WHERE n.original_rating IS NOT NULL
GROUP BY n.original_type, m.genre, n.original_release_year, n.original_rating
ORDER BY total_titles DESC

Query is running:   0%|          |

IMDB Movie Metadata Mart

In [18]:
%%bigquery
CREATE or REPLACE table movies_entertainment_mrt.imdb_movies_metadata AS
SELECT 
    m.original_language, 
    COUNT(m.id) AS total_movies, 
    n.original_rating, 
    CASE 
        WHEN m.budget < 1000000 THEN 'Low Budget (<1M)' 
        WHEN m.budget BETWEEN 1000000 AND 50000000 THEN 'Medium Budget (1M-50M)' 
        ELSE 'High Budget (>50M)' 
    END AS budget_category
FROM movies_entertainment_int.Movies_Metadata m
LEFT JOIN movies_entertainment_int.Netflix_Movies_And_Tvshows n ON SAFE_CAST(m.id as STRING) = n.id
WHERE m.budget IS NOT NULL AND n.original_rating IS NOT NULL
GROUP BY m.original_language, n.original_rating, budget_category
ORDER BY total_movies DESC

Query is running:   0%|          |

**3.) How does the budget of a movie correlate with its box office gross performance?**

Box Office vs. Budget Mart

In [19]:
%%bigquery
CREATE or REPLACE table movies_entertainment_mrt.box_office_vs_budget AS
SELECT 
    m.id AS movie_id,
    m.original_title,
    m.genre,
    m.budget,
    b.lifetime_gross,
    CASE 
        WHEN m.budget = 0 THEN NULL
        ELSE ROUND((b.lifetime_gross - m.budget) / NULLIF(m.budget, 0), 2) 
    END AS roi_percentage
FROM movies_entertainment_int.Movies_Metadata m
JOIN movies_entertainment_int.Box_Office_Gross b 
    ON m.id = b.id
WHERE m.budget IS NOT NULL 
    AND b.lifetime_gross IS NOT NULL
    AND m.budget > 0
ORDER BY roi_percentage DESC

Query is running:   0%|          |

**4.) Which genres are most frequently produced, and how do they perform across different companies?**

Genre Performance Mart

In [20]:
%%bigquery
create or replace table movies_entertainment_mrt.genre_performance AS
SELECT 
    m.genre, 
    COUNT(m.id) AS total_movies, 
    SUM(b.lifetime_gross) AS total_revenue, 
    ROUND(AVG(b.lifetime_gross), 2) AS avg_revenue_per_movie
FROM movies_entertainment_int.Movies_Metadata m
JOIN movies_entertainment_int.Box_Office_Gross b ON m.id = b.id
WHERE m.genre IS NOT NULL AND b.lifetime_gross IS NOT NULL
GROUP BY m.genre
ORDER BY total_movies DESC, total_revenue DESC


Query is running:   0%|          |

Company Genre Trends Mart

In [21]:
%%bigquery
create or replace table movies_entertainment_mrt.company_genre_trends AS
SELECT 
    c.brand, 
    m.genre, 
    COUNT(m.id) AS total_movies, 
    SUM(b.lifetime_gross) AS total_revenue, 
    ROUND(AVG(b.lifetime_gross), 2) AS avg_revenue_per_movie
FROM movies_entertainment_int.Movies_Metadata m
JOIN movies_entertainment_int.Box_Office_Gross b ON m.id = b.id
JOIN movies_entertainment_int.Companies c ON b.brand = c.brand
WHERE m.genre IS NOT NULL AND b.lifetime_gross IS NOT NULL
GROUP BY c.brand, m.genre
ORDER BY total_revenue DESC

Query is running:   0%|          |

**5.) How do audience ratings (PG, PG-13, R, etc.) correlate with movie budgets, revenue, and genre trends?**

Audience Rating vs. Budget Mart

In [22]:
%%bigquery
create or replace table movies_entertainment_mrt.audience_rating_vs_budget AS
SELECT 
    n.original_rating, 
    COUNT(m.id) AS total_movies, 
    ROUND(AVG(m.budget), 2) AS avg_budget, 
    MAX(m.budget) AS max_budget, 
    MIN(m.budget) AS min_budget
FROM movies_entertainment_int.Movies_Metadata m
JOIN movies_entertainment_int.Netflix_Movies_And_Tvshows n ON SAFE_CAST(m.id as STRING) = n.id
WHERE n.original_rating IS NOT NULL AND m.budget IS NOT NULL 
GROUP BY n.original_rating
ORDER BY avg_budget DESC



Query is running:   0%|          |

Audience Rating vs. Revenue Mart

In [23]:
%%bigquery
create or replace table movies_entertainment_mrt.audience_rating_vs_revenue AS
SELECT 
    n.original_rating, 
    COUNT(m.id) AS total_movies, 
    SUM(b.lifetime_gross) AS total_revenue, 
    ROUND(AVG(b.lifetime_gross), 2) AS avg_revenue_per_movie
FROM movies_entertainment_int.Movies_Metadata m
JOIN movies_entertainment_int.Box_Office_Gross b ON m.id = b.id
JOIN movies_entertainment_int.Netflix_Movies_And_Tvshows n ON SAFE_CAST(m.id as STRING) = n.id
WHERE n.original_rating IS NOT NULL AND b.lifetime_gross IS NOT NULL
GROUP BY n.original_rating
ORDER BY total_revenue DESC



Query is running:   0%|          |

Audience Rating vs. Genre Trends Mart

In [24]:
%%bigquery
create or replace table movies_entertainment_mrt.audience_rating_vs_genre AS
SELECT 
    m.genre, 
    n.original_rating, 
    COUNT(m.id) AS total_movies
FROM movies_entertainment_int.Movies_Metadata m
JOIN movies_entertainment_int.Netflix_Movies_And_Tvshows n ON SAFE_CAST(m.id as STRING) = n.id
WHERE n.original_rating IS NOT NULL AND m.genre IS NOT NULL
GROUP BY m.genre, n.original_rating
ORDER BY m.genre, total_movies DESC


Query is running:   0%|          |