In [1]:
import altair as alt
import pandas as pd

# Load data
movies_credits = pd.read_csv('movies_and_credits.csv')

# Convert release_date to datetime
movies_credits['release_date'] = pd.to_datetime(
    movies_credits['release_date'], 
    format='%m/%d/%Y', 
    errors='coerce'
)

# Drop rows with invalid dates
movies_credits = movies_credits.dropna(subset=['release_date'])

# Create year-month column
movies_credits['year_month'] = movies_credits['release_date'].dt.to_period('M').dt.to_timestamp()

# Group by year-month and count movies
monthly_data = movies_credits.groupby('year_month').size().reset_index(name='count')

# Get year range for sliders
min_year = int(movies_credits['release_date'].dt.year.min())
max_year = int(movies_credits['release_date'].dt.year.max())

# Create bindings for start year and end year slider
start_year_slider = alt.binding_range(min=min_year, max=max_year-1, step=1, name='Start Year:')
end_year_slider = alt.binding_range(min=min_year+1, max=max_year, step=1, name='End Year:')

# Create parameters with bindings 
start_year_var = alt.param(value=min_year, bind=start_year_slider)
end_year_var = alt.param(value=max_year, bind=end_year_slider)

# Create the chart with filtering
chart = alt.Chart(monthly_data).transform_calculate(
    year='year(datum.year_month)'
).transform_filter(
    (alt.datum.year >= start_year_var) & (alt.datum.year <= end_year_var)
).mark_line(
    point=True,
    color='steelblue',
    size=3
).encode(
    x=alt.X('year_month:T', 
            title='Date (Year-Month)',
            axis=alt.Axis(format='%Y-%m', labelAngle=-45)),
    y=alt.Y('count:Q', 
            title='Number of Movies Released',
            scale=alt.Scale(zero=True)),
    tooltip=[
        alt.Tooltip('year_month:T', title='Month', format='%B %Y'),
        alt.Tooltip('count:Q', title='Movies Released')
    ]
).properties(
    width=900,
    height=400,
    title='Movie Release Patterns by Year and Month - Interactive Time Scroller'
).add_params(
    start_year_var,
    end_year_var
).configure_axis(
    labelFontSize=12,
    titleFontSize=14
).configure_title(
    fontSize=16,
    anchor='middle'
)


chart

In [2]:
#5. Grouped Bar chart (D3): the average budget by major studios across different countries

import altair as alt
import pandas as pd
import json

# Load data
movies_credits = pd.read_csv('movies_and_credits.csv')

# Use extraction functions to parse the two data columns
def extract_studio(row):
    companies = json.loads(row) if isinstance(row, str) else []
    return companies[0]['name'] if companies else None

def extract_country(row):
    countries = json.loads(row) if isinstance(row, str) else []
    return countries[0]['name'] if countries else None

movies_credits['studio'] = movies_credits['production_companies'].apply(extract_studio)
movies_credits['country'] = movies_credits['production_countries'].apply(extract_country)

# Filter and clean data
filtered = movies_credits.dropna(subset=['studio', 'country', 'budget'])
filtered = filtered[filtered['budget'] > 0]

# Get top 10 studios with most movies and top 8 countries that produced the most movies for display
top_studios = filtered['studio'].value_counts().head(10).index.tolist()
top_countries = filtered['country'].value_counts().head(8).index.tolist()

# Filter to top studios and countries
filtered = filtered[
    (filtered['studio'].isin(top_studios)) &
    (filtered['country'].isin(top_countries))
]

# Calculate average budget by studio and country (convert to millions)
budget_by_group = filtered.groupby(['country', 'studio'])['budget'].mean().reset_index()
budget_by_group['budget_millions'] = budget_by_group['budget'] / 1_000_000

# Create the grouped bar chart
chart = alt.Chart(budget_by_group).mark_bar().encode(
    x=alt.X('country:N', 
            title='Country',
            axis=alt.Axis(labelAngle=-45)),
    y=alt.Y('budget_millions:Q', 
            title='Average Budget (Millions USD)',
            scale=alt.Scale(zero=True)),
    color=alt.Color('studio:N', 
                    title='Movie Company',
                    scale=alt.Scale(scheme='category20')),
    xOffset='studio:N',
    tooltip=[
        alt.Tooltip('country:N', title='Country'),
        alt.Tooltip('studio:N', title='Studio'),
        alt.Tooltip('budget_millions:Q', title='Avg Budget ($M)', format=',.2f')
    ]
).properties(
    width=1000,
    height=600,
    title='Average Budget by Major Studios Across Different Countries'
).configure_axis(
    labelFontSize=14,
    titleFontSize=16
).configure_title(
    fontSize=18,
    anchor='middle'
).configure_legend(
    titleFontSize=14,
    labelFontSize=12
)

chart