In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
import numpy as np
import pandas as pd
import matplotlib.dates as mdates
import matplotlib.font_manager as fm
from matplotlib.offsetbox import OffsetImage, AnnotationBbox
from PIL import Image
import requests
from io import BytesIO

def setup_theme():
    """Set up theme for NoN"""
    # Set up base style
    # plt.style.use('dark_background')
    plt.style.use('fivethirtyeight')

    # Define theme colors
    colors = {
        "background": "#305F72",  # Dark Cyan Blue
        "grid": "#A9B7C0",        # Light Grayish Blue
        "text": "#EAF2F8",        # Very Pale Blue
        "highlight1": "#FFD700",  # Gold
        "highlight2": "#98FB98",  # Pale Green
        "highlight3": "#FF69B4",  # Hot Pink
        "highlight4": "#E6E6FA",  # Lavender
        "highlight5": "#B0E0E6",  # Powder Blue
        "highlight6": "#B8860B"   # Dark Goldenrod
    }

    # Set font - try to use Roboto Condensed if available
    # font_path = '/home/fawwaz/.local/share/fonts/RobotoCondensed-VariableFont_wght.ttf'
    # fm.fontManager.addfont(font_path)
    mpl.rcParams['font.family'] = 'sans-serif'
    mpl.rcParams['font.sans-serif'] = ['Dejavu Sans']

    # Set figure parameters
    mpl.rcParams['figure.figsize'] = (9, 6)
    mpl.rcParams['figure.facecolor'] = colors["background"]
    mpl.rcParams['figure.edgecolor'] = colors["background"]

    # Set axes parameters
    mpl.rcParams['axes.facecolor'] = colors["background"]
    mpl.rcParams['axes.edgecolor'] = colors["text"]
    mpl.rcParams['axes.labelcolor'] = colors["text"]
    mpl.rcParams['axes.grid'] = True
    mpl.rcParams['axes.titlesize'] = 28
    mpl.rcParams['axes.labelsize'] = 14
    mpl.rcParams['axes.spines.top'] = False
    mpl.rcParams['axes.spines.right'] = False

    # Set grid parameters
    mpl.rcParams['grid.color'] = colors["grid"]
    mpl.rcParams['grid.linestyle'] = '-'
    mpl.rcParams['grid.alpha'] = 0.3

    # Set text parameters
    mpl.rcParams['text.color'] = colors["text"]

    # Set tick parameters
    mpl.rcParams['xtick.color'] = colors["text"]
    mpl.rcParams['ytick.color'] = colors["text"]
    mpl.rcParams['xtick.labelsize'] = 12
    mpl.rcParams['ytick.labelsize'] = 12

    # Set legend parameters
    mpl.rcParams['legend.facecolor'] = colors["background"]
    mpl.rcParams['legend.edgecolor'] = colors["background"]
    mpl.rcParams['legend.fontsize'] = 14
    mpl.rcParams['legend.framealpha'] = 0.8

    # Set line params
    mpl.rcParams['lines.linewidth'] = 4

    return colors

# Load the logo
def get_logo():
    url = "https://github.com/fawwazanvilen/Notes-on-Nusantara/blob/main/_logo/non_logo.png?raw=true"
    response = requests.get(url)
    logo = Image.open(BytesIO(response.content))
    return logo


In [None]:
# Mudik data
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import requests
import datetime
from datetime import timedelta
import numpy as np
import time

# Calculate dates based on Idul Fitri dates
idul_fitri_dates = {
    "2021": "2021-05-13",
    "2022": "2022-05-02",
    "2023": "2023-04-22",
    "2024": "2024-04-10",
    "2025": "2025-03-31"
}

# Function to call the API with retry logic
def get_data(start_date_1, end_date_1, start_date_2, end_date_2, max_retries=3):
    url = "https://strategi.kemenhub.go.id/api/hubnet/data-pantau-produksi-rutin"
    payload = {
        "tanggal_awal_1": start_date_1,
        "tanggal_akhir_1": end_date_1,
        "tanggal_awal_2": start_date_2,
        "tanggal_akhir_2": end_date_2,
        "provinsi": "",
        "moda": "",
        "endpoints": ["data-produksi"]
    }
    headers = {
        "Content-Type": "application/json",
        "User-Agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36"
    }
    retries = 0
    while retries < max_retries:
        try:
            print(f"Requesting data for periods: {start_date_1} to {end_date_1} and {start_date_2} to {end_date_2}")
            response = requests.post(url, json=payload, headers=headers)
            if response.status_code == 200:
                data = response.json()
                # Verify that we got valid data
                if 'data' in data and 'result' in data['data']:
                    print("Data successfully retrieved.")
                    return data
                else:
                    print(f"API returned incomplete data: {data}")
            else:
                print(f"API request failed with status code: {response.status_code}")
            retries += 1
            wait_time = 2 ** retries  # Exponential backoff
            print(f"Retrying in {wait_time} seconds... (Attempt {retries}/{max_retries})")
            time.sleep(wait_time)
        except Exception as e:
            print(f"Error making API request: {e}")
            retries += 1
            wait_time = 2 ** retries
            print(f"Retrying in {wait_time} seconds... (Attempt {retries}/{max_retries})")
            time.sleep(wait_time)
    raise Exception("Failed to retrieve data after multiple attempts")

# Function to calculate dates relative to Idul Fitri for each year
def calc_relative_dates(year, days_before, days_after):
    """
    Calculate a date range relative to Idul Fitri

    Parameters:
    year (str): Year as string
    days_before (int): Days before Idul Fitri (positive number)
    days_after (int): Days after Idul Fitri (positive number)

    Returns:
    tuple: (start_date, end_date) as YYYY-MM-DD strings
    """
    base_date = datetime.datetime.strptime(idul_fitri_dates[year], "%Y-%m-%d")
    start_date = (base_date - timedelta(days=days_before)).strftime("%Y-%m-%d")
    end_date = (base_date + timedelta(days=days_after)).strftime("%Y-%m-%d")
    return start_date, end_date

# Process the data into DataFrames
def extract_passenger_data(response, period_key):
    # Check if the expected data structure exists
    if not response or 'data' not in response or 'result' not in response['data']:
        print(f"Error: Invalid response structure: {response}")
        return pd.DataFrame()  # Return empty dataframe
    if period_key not in response['data']['result']:
        print(f"Error: Period key '{period_key}' not found in response: {response['data']['result'].keys()}")
        return pd.DataFrame()
    if 'data_produksi' not in response['data']['result'][period_key]:
        print(f"Error: 'data_produksi' not found in period {period_key}: {response['data']['result'][period_key].keys()}")
        return pd.DataFrame()
    if 'data' not in response['data']['result'][period_key]['data_produksi']:
        print(f"Error: 'data' not found in data_produksi: {response['data']['result'][period_key]['data_produksi'].keys()}")
        return pd.DataFrame()
    result = response['data']['result'][period_key]['data_produksi']['data']
    # Extract data from all transport modes
    all_modes = []
    # Air transport
    if 'udara' in result:
        try:
            air_df = pd.DataFrame(result['udara'])
            air_df['mode'] = 'Air'
            all_modes.append(air_df)
        except Exception as e:
            print(f"Error processing air transport data: {e}")
    # Sea transport
    if 'laut' in result:
        try:
            sea_df = pd.DataFrame(result['laut'])
            sea_df['mode'] = 'Sea'
            all_modes.append(sea_df)
        except Exception as e:
            print(f"Error processing sea transport data: {e}")
    # Rail transport
    if 'ka' in result:
        try:
            rail_df = pd.DataFrame(result['ka'])
            rail_df['mode'] = 'Rail'
            all_modes.append(rail_df)
        except Exception as e:
            print(f"Error processing rail transport data: {e}")
    # Road transport
    if 'jalan' in result:
        try:
            road_df = pd.DataFrame(result['jalan'])
            road_df['mode'] = 'Road'
            all_modes.append(road_df)
        except Exception as e:
            print(f"Error processing road transport data: {e}")
    if not all_modes:
        print(f"Warning: No transport data found in period {period_key}")
        return pd.DataFrame()
    # Combine all modes
    combined_df = pd.concat(all_modes)
    # Convert date and numeric columns
    combined_df['tanggal'] = pd.to_datetime(combined_df['tanggal'])
    # Identify numeric columns
    numeric_cols = []
    for col in combined_df.columns:
        if col not in ['tanggal', 'tgl', 'mode']:
            try:
                combined_df[col] = pd.to_numeric(combined_df[col], errors='coerce')
                numeric_cols.append(col)
            except:
                print(f"Column {col} couldn't be converted to numeric")
    # Calculate total passengers (arrival + departure)
    if 'jml_pnp_dtg' in combined_df.columns and 'jml_pnp_brgkt' in combined_df.columns:
        combined_df['total_passengers'] = combined_df['jml_pnp_dtg'] + combined_df['jml_pnp_brgkt']
    elif 'penumpang' in combined_df.columns:
        combined_df['total_passengers'] = combined_df['penumpang']
    else:
        # Find columns that might contain passenger numbers
        possible_pax_cols = [col for col in numeric_cols if 'pnp' in col.lower() or 'penumpang' in col.lower()]
        if possible_pax_cols:
            combined_df['total_passengers'] = combined_df[possible_pax_cols].sum(axis=1)
        else:
            print(f"Warning: Could not determine passenger columns. Available columns: {combined_df.columns.tolist()}")
            combined_df['total_passengers'] = 0
    return combined_df

# Function to fetch data with parameters
def fetch_mudik_data(years, days_before, days_after):
    """
    Fetch and process data for specified years and date range

    Parameters:
    years (list): List of year strings to process
    days_before (int): Days before Idul Fitri to start
    days_after (int): Days after Idul Fitri to end

    Returns:
    DataFrame: Combined data for all years
    """
    all_dataframes = []

    # Process consecutive year pairs
    for i in range(0, len(years), 2):
        if i+1 < len(years):  # Make sure we have a pair
            year1, year2 = years[i], years[i+1]
            start1, end1 = calc_relative_dates(year1, days_before, days_after)
            start2, end2 = calc_relative_dates(year2, days_before, days_after)

            print(f"\nFetching data for {year1} & {year2}...")
            print(f"Period for {year1}: {start1} to {end1}")
            print(f"Period for {year2}: {start2} to {end2}")

            data = get_data(start1, end1, start2, end2)

            df1 = extract_passenger_data(data, '1')
            df1['year'] = int(year1)

            df2 = extract_passenger_data(data, '2')
            df2['year'] = int(year2)

            all_dataframes.extend([df1, df2])

    # Handle odd number of years (last year alone if needed)
    if len(years) % 2 == 1:
        last_year = years[-1]
        # Use previous year as comparison but we'll filter it out later
        prev_year = str(int(last_year) - 1)

        start1, end1 = calc_relative_dates(last_year, days_before, days_after)
        start2, end2 = calc_relative_dates(prev_year, days_before, days_after)

        print(f"\nFetching data for {last_year}...")
        print(f"Period: {start1} to {end1}")

        data = get_data(start1, end1, start2, end2)
        df = extract_passenger_data(data, '1')
        df['year'] = int(last_year)
        all_dataframes.append(df)

    # Combine all data
    if not all_dataframes:
        raise ValueError("No data available for any of the requested years")

    combined_df = pd.concat(all_dataframes)

    # Create relative days column
    combined_df['days_relative_to_lebaran'] = combined_df.apply(
        lambda x: (x['tanggal'] - pd.to_datetime(idul_fitri_dates[str(x['year'])])).days,
        axis=1
    )

    return combined_df


In [None]:
# Create the plot
if __name__ == "__main__":
    ## DATA    
    # Set parameters for the analysis
    years_to_analyze = ["2021", "2022", "2023", "2024", "2025"]
    days_before = 7  # Change this to your desired days before Idul Fitri
    days_after = 4   # Change this to your desired days after Idul Fitri

    # Print the date ranges we'll be requesting
    print("Date ranges for analysis:")
    for year in years_to_analyze:
        start, end = calc_relative_dates(year, days_before, days_after)
        print(f"Year {year}: H-{days_before} to H+{days_after} corresponds to {start} to {end}")

    # Check if the data already exists (dumb check)
    # try:
    #     # Try to access all_years_df - this will raise NameError if it doesn't exist
    #     _ = all_years_df
    #     print("\nUsing existing data in all_years_df")
    # except NameError:
    #     # Only fetch if the data doesn't exist
    #     print("\nFetching new data...")
    #     all_years_df = fetch_mudik_data(years_to_analyze, days_before, days_after)
    # Fetch always
    print("\nFetching new data...")
    all_years_df = fetch_mudik_data(years_to_analyze, days_before, days_after)
    
    # Display some basic info about the combined dataset
    print("\nDataset summary:")
    print(f"Total rows: {len(all_years_df)}")
    print(f"Date range: {all_years_df['tanggal'].min()} to {all_years_df['tanggal'].max()}")
    print(f"Years included: {all_years_df['year'].unique()}")
    print(f"Transport modes: {all_years_df['mode'].unique()}")
    print(f"Days relative to lebaran range: {all_years_df['days_relative_to_lebaran'].min()} to {all_years_df['days_relative_to_lebaran'].max()}")

    ## PLOTTING
    # Setup theme and get colors
    colors = setup_theme()

    # Get logo
    logo = get_logo()

    # Create the figure and plot data
    fig, ax = plt.subplots()

    # Daily Passengers per Volume
    years_available = sorted(all_years_df['year'].unique())
    highlight_colors = [colors[f"highlight{i}"] for i in range(1, 7)]
    for i, year in enumerate(years_available):
        year_data = all_years_df[all_years_df['year'] == year]
        # Group by days relative to lebaran and sum all passengers
        grouped = year_data.groupby('days_relative_to_lebaran')['total_passengers'].sum().reset_index()
        color_idx = i % len(highlight_colors)  # cycle through colors if more years than colors
        # ax.plot(grouped['days_relative_to_lebaran'], grouped['total_passengers'], 
        #         marker='o', label=str(year), color=highlight_colors[color_idx],
        #         linewidth=4)
        # use the default fivethirtyeight colors
        line, = ax.plot(grouped['days_relative_to_lebaran'], grouped['total_passengers'], 
                marker='o', label=str(year), linewidth=4)

        # Add year labels
        # add label at h+3 point
        h_plus_data = grouped[grouped['days_relative_to_lebaran'] == 4]
        if not h_plus_data.empty:
            x = 4 # this is h+3
            y = h_plus_data.iloc[0]['total_passengers']
            # add text with slight offset and matching color
            ax.text(x + 0.2, y, str(year), color=line.get_color(), va='center')
            # if year == 2022:
            #     ax.text(x + 0.2, y*0.98, str(year), color=line.get_color(), va='center')
            # elif year == 2023:
            #     ax.text(x + 0.2, y*1.02, str(year), color=line.get_color(), va='center')
            # else:
            #     ax.text(x + 0.2, y, str(year), color=line.get_color(), va='center')
        
    # Set title and subtitle
    # ax.set_title("The Goods Slowdown", pad=20)
    ax.set_title("Mudik Menurun", pad=30, y=1.02, fontsize=30)
    ax.text(0.5, 1.06, "Tahun 2025 mencatat penurunan pemudik pertama kali sejak Pandemi",
            transform=ax.transAxes, ha='center', fontsize=11, alpha=0.8)

    # # Format axes
    ax.set_xlabel("Hari relatif thdp. Idul Fitri", labelpad=5)
    ax.set_ylabel("Total Penumpang (Moda Jalan)", labelpad=5)
    # ax.set_ylim(75, 140)
    # ax.set_yticks([80, 90, 100, 110, 120, 130, 140])

    # format y axis to use k and m etc
    mkfunc = lambda x, pos: f'{int(x/1e6)}M' if x >= 1e6 else f'{int(x/1e3)}K' if x >= 1e3 else f'{int(x)}'
    mkformatter = mpl.ticker.FuncFormatter(mkfunc)
    ax.yaxis.set_major_formatter(mkformatter)

    # # Format dates
    # ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
    # ax.xaxis.set_major_locator(mdates.YearLocator())

    # Add legend
    # legend = ax.legend(loc='upper left', bbox_to_anchor=(0.05, 0.95),
    #                    frameon=True, title="Tahun")
    # legend.get_title().set_color('white')

    # Add caption at the bottom
    ax.text(0.45, -0.25, "Plot dibuat oleh @fawwazanvilen untuk Notes on Nusantara\ndengan data Sistem Informasi Transportasi Terintegrasi Kemenhub",
            transform=ax.transAxes, ha='left', fontsize=9, alpha=0.6)

    # Add logo
    imagebox = OffsetImage(logo, zoom=0.05)
    ab = AnnotationBbox(imagebox, (-0.125, -0.125), xycoords='axes fraction', frameon=False)
    ax.add_artist(ab)

    plt.tight_layout()
    plt.savefig('Mudik-2025.png', dpi=300, bbox_inches='tight')
    plt.show()

In [None]:
all_years_df

In [None]:
all_years_df.to_csv('all_years_df.csv')