***
### Import of required libraries
***

In [12]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np
import pandas as pd

***
### Overall distribution
***

In [None]:
# Import of data
df = pd.read_parquet("/home/jan/STAR_shortcut_OSN_paper/data/LSGG/landing_df.parquet")

# Define the full STAR distances for each STAR
stars = {
    'BELU3N': 92.8,
    'KINE2N': 103.8,
    'AKIT3R': 110.3,
    'LUSA2N': 87.9
}

yranges = {
    'BELU3N': [20, 110],
    'KINE2N': [60, 130],
    'AKIT3R': [50, 120],
    'LUSA2N': [50, 110]
}

# Create the subplots with a 1x4 grid (1 row, 4 columns)
fig = make_subplots(
    rows=1, cols=4, 
    shared_xaxes=True,
    horizontal_spacing=0.09,
    subplot_titles=list(stars.keys()),
)

# Loop over each star to generate the subplots
for i, (star, star_distance) in enumerate(stars.items()):
    col = i + 1
    
    # Retrieve the data for the specific star and remove outliers
    temp_df = df.query(f"star == '{star}' and distance < 200 and distance > 20")

    # Calculate the 95th percentile and median for the specific star
    percentile_95 = np.percentile(temp_df.distance, 95)

    # Add the violin plot for the star
    fig.add_trace(
        go.Violin(
            y=temp_df.distance,
            line_color='#1f77b4',
            meanline_visible=True,
            spanmode='hard',
            name=f"{star}",
            showlegend=False,
            box_visible=True,
        ),
        row=1, col=col
    )

    # Update the y-axis range
    fig.update_yaxes(
        range=yranges[star],
        col=col,
    )

    # Add the 95th percentile as a horizontal line (overlay)
    fig.add_shape(
        type="line",
        xref=f"x{col}",
        yref=f"y{col}",
        x0=-0.5, x1=0.5,
        y0=percentile_95, y1=percentile_95,
        line=dict(color="darkblue", width=3, dash="dash"),
        layer="below",
    )

    # Add the full STAR distance as a horizontal line (overlay)
    fig.add_shape(
        type="line",
        xref=f"x{col}",
        yref=f"y{col}",
        x0=-0.5, x1=0.5,
        y0=star_distance, y1=star_distance,
        line=dict(color="#d62728", width=3, dash="dash"),
        layer="below",
    )

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode="lines",
        line=dict(color="darkblue", width=3, dash="dash"),
        name="95th Percentile of observed distances  "
    )
)

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode="lines",
        line=dict(color="#d62728", width=3, dash="dash"),
        name="Full STAR distance as per procedure  "
    )
)

# Update layout
fig.update_layout(
    height=700,
    width=2000,
    margin=dict(l=50, r=50, t=100, b=50),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.15,
        xanchor="center",
        x=0.5,
        font=dict(size=25)
    ),
    annotations=[dict(font=dict(size=30), y=1.05)],
)

# Update shared y-axis label
fig.update_yaxes(title_text="Observed Distances [NM]", titlefont=dict(size=25), tickfont=dict(size=25))

# Update x-axis label size
fig.update_xaxes(
    tickfont=dict(size=25)
)

# Show the figure
fig.show()


***
### Distribution Peak / Off-peak
***

#### Add peak / off-peak info

In [14]:
# Extract hour and minute from the "stop" column and create two groups based on time
df['hour'] = pd.to_datetime(df['stop']).dt.hour
df['minute'] = pd.to_datetime(df['stop']).dt.minute

def assign_group(hour, minute):
    time_in_minutes = hour * 60 + minute
    if (
        (time_in_minutes >= 420 and time_in_minutes <= 600) or  # 07:00 - 10:00
        (time_in_minutes >= 660 and time_in_minutes <= 780) or  # 11:00 - 13:00
        (time_in_minutes >= 840 and time_in_minutes <= 1020) or  # 14:00 - 17:00
        (time_in_minutes >= 1200 and time_in_minutes <= 1260)    # 20:00 - 21:00
    ):
        return 'Peak'
    else:
        return 'Off-peak'

# Apply the function to create the 'group' column
df['group'] = df.apply(lambda row: assign_group(row['hour'], row['minute']), axis=1)

#### Generate plot

In [None]:
stars = {
    'BELU3N': 92.8,
    'KINE2N': 103.8,
    'AKIT3R': 110.3,
    'LUSA2N': 87.9
}

yranges = {
    'BELU3N': [20, 110],
    'KINE2N': [60, 130],
    'AKIT3R': [50, 120],
    'LUSA2N': [50, 110]
}

# Create subplots for each star, setting shared_yaxes=False so each plot has its own Y-axis
fig = make_subplots(rows=1, cols=4, shared_yaxes=False, subplot_titles=list(stars.keys()), horizontal_spacing=0.09)

# Loop over each star to generate the subplots
for i, (star, star_distance) in enumerate(stars.items()):
    col = i + 1
    # Filter data for each star
    temp_df = df.query(f"star == '{star}' and distance < 150 and distance > 10")

    # Create boxplots for each group per star
    fig.add_trace(
        go.Box(
            y=temp_df.query("group == 'Peak'")['distance'],
            name='Peak',
            marker_color='#1f77b4',
            showlegend=False,
            notched=True,
        ), 
        row=1, col=col
    )
    fig.add_trace(
        go.Box(
            y=temp_df.query("group == 'Off-peak'")['distance'],
            name='Off-peak',
            marker_color='#ff7f0e',
            showlegend=False,
            notched=True,
        ), 
        row=1, col=col
    )

    # Add the full STAR distance as a horizontal line (overlay)
    fig.add_shape(
        type="line",
        xref=f"x{col}",
        yref=f"y{col}",
        x0=-0.5, x1=1.5,
        y0=star_distance, y1=star_distance,
        line=dict(color="#d62728", width=3, dash="dash"),
        layer="below",
    )

    # Update the y-axis range for each subplot
    fig.update_yaxes(
        range=yranges[star],
        row=1, col=col,
    )

fig.add_trace(
    go.Scatter(
        x=[None], y=[None],
        mode="lines",
        line=dict(color="#d62728", width=3, dash="dash"),
        name="Full STAR distance as per procedure  "
    )
)

# Update layout
fig.update_layout(
    height=700,
    width=2000,
    margin=dict(l=50, r=50, t=100, b=50),
    legend=dict(
        orientation="h",
        yanchor="bottom",
        y=-0.2,
        xanchor="center",
        x=0.5,
        font=dict(size=25)
    ),
    annotations=[dict(font=dict(size=30), y=1.05)]
)

# Update shared y-axis label
fig.update_yaxes(title_text="Observed Distances [NM]", titlefont=dict(size=25), tickfont=dict(size=25))

# Update x-axis label size
fig.update_xaxes(
    tickfont=dict(size=25)
)

# Show plot
fig.show()