In [1]:
# working dir
import os
import sys

cwd = os.getcwd()
root_dir = os.path.dirname(os.path.dirname(cwd))
sys.path.append(root_dir)

print(root_dir)

c:\Users\david\Desktop\Main\03_Uni\WWI21DSA\02_Vorlesungen\06_Projektrealisierung\Projektrealisierung


In [2]:
# Imports and settings
import pandas as pd
import random

random.seed(42)

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)

In [3]:
import pandas as pd
from IPython.display import display, HTML

# Display Dataframe (with scrollbars)
def ddf(df, max_height=500, max_width=1500):
    """
    Display a pandas DataFrame with horizontal and vertical scrollbars in a Jupyter notebook.
    
    Parameters:
    df (pd.DataFrame): The DataFrame to display.
    max_height (int): The maximum height of the scrollable area in pixels.
    max_width (int): The maximum width of the scrollable area in pixels.
    """
    style = f"""
    <style>
    .scrollable-dataframe {{
        max-height: {max_height}px;
        max-width: {max_width}px;
        overflow: auto;
        display: inline-block;
        position: relative;
    }}
    .scrollable-dataframe thead th {{
        position: sticky;
        top: 0;
        background-color: white;
        z-index: 1;
    }}
    </style>
    """
    html = style + df.to_html(classes='scrollable-dataframe')
    display(HTML(html))


In [4]:
def split_creation_time(df):
    df["creation_datetime"] = df["creation_time"]
    df["creation_time"] = pd.to_datetime(df["creation_time"])
    df["creation_date"] = df["creation_time"].dt.date
    df["creation_time"] = df["creation_time"].dt.time
    start_cols = ["id", "creation_date", "creation_time"]
    df = df[start_cols + [col for col in df.columns if col not in start_cols]]
    return df

-------------

In [5]:
meta = pd.concat([
    pd.read_csv("../../src/data/ABCD_tripfiles_preprocessed.csv"),
    pd.read_csv("../../src/data/MNOP_tripfiles_preprocessed.csv"),
    pd.read_csv("../../src/data/ZYXW_tripfiles_preprocessed.csv"),
])
meta = meta[["id", "creation_time"]]
meta = split_creation_time(meta)

  pd.read_csv("../../src/data/ABCD_tripfiles_preprocessed.csv"),
  pd.read_csv("../../src/data/MNOP_tripfiles_preprocessed.csv"),
  pd.read_csv("../../src/data/ZYXW_tripfiles_preprocessed.csv"),


In [6]:
cwata = pd.concat([
    pd.read_csv("../../src/data/extracted/abcd_CalculateWeightAndTrimAction.csv"), 
    pd.read_csv("../../src/data/extracted/mnop_CalculateWeightAndTrimAction.csv"), 
    pd.read_csv("../../src/data/extracted/zyxw_CalculateWeightAndTrimAction.csv")
    ])

In [7]:
meta_cols = ["flight_id", "id", "action_name"]
weight_cols = ["ALAW", "ATOW", "ATXW", "AZFW", "TOTAL_TRAFFIC_LOAD", "DO_WI_weight", "PAX_WI_weight", "TOTAL_LOAD_WI", "START_WI_weight"]
cwata = cwata[meta_cols + weight_cols].dropna()

entries_per_flight = cwata.groupby("flight_id")["id"].count().reset_index().sort_values("id", ascending=False)
top_ten_flights = entries_per_flight.head(10)["flight_id"].tolist()

In [8]:
ulta = pd.concat([
    pd.read_csv("../../src/data/extracted/abcd_UpdateLoadTableAction_saved.csv"), 
    pd.read_csv("../../src/data/extracted/mnop_UpdateLoadTableAction_saved.csv"), 
    pd.read_csv("../../src/data/extracted/zyxw_UpdateLoadTableAction_saved.csv")
    ])

ulta = ulta[["flight_id", "id", "ESTIMATED_Total_baggage", "ESTIMATED_Total_cargo", "ESTIMATED_Total_EIC", "ESTIMATED_Total_mail"]]

In [9]:
cwata["ESTIMATED_Total_baggage"] = None
cwata["ESTIMATED_Total_cargo"] = None
cwata["ESTIMATED_Total_mail"] = None
cwata["ESTIMATED_Total_EIC"] = None

ulta["action_name"] = "UpdateLoadTableAction"
for col in weight_cols:
    ulta[col] = None

df = pd.concat([cwata, ulta]).sort_values(["flight_id", "id"])
df = pd.merge(df, meta, on="id", how="left")
df = df.groupby("flight_id").apply(lambda x: x.fillna(method="ffill")).reset_index(drop=True)

  df = pd.concat([cwata, ulta]).sort_values(["flight_id", "id"])
  df = df.groupby("flight_id").apply(lambda x: x.fillna(method="ffill")).reset_index(drop=True)
  df = df.groupby("flight_id").apply(lambda x: x.fillna(method="ffill")).reset_index(drop=True)


In [10]:
# drop NAs which were created as a result of the grouped fillna (first entries per group)
df = df.dropna()

# drop rows where at least one column has a 0
mask = (df == 0).any(axis=1)
df = df[~mask]

In [12]:
df["fuel_airborn"] = df["ATOW"] - df["ALAW"]
df["fuel_taxi"] = df["ATXW"] - df["ATOW"]
df["fuel_weight"] = df["ATXW"] - df["AZFW"]
df["crew_and_equip"] = df["DO_WI_weight"] - df["START_WI_weight"]

df.rename(columns={
    "ESTIMATED_Total_EIC": "EIC",
    "ESTIMATED_Total_mail": "mail",
    "ESTIMATED_Total_cargo": "cargo",
    "ESTIMATED_Total_baggage": "baggage",
    }, inplace=True)

In [38]:
import plotly.graph_objects as go
import imageio

# <------------- For static and interactive sankey

labels = [
    'EIC', 'mail', 'cargo', 'baggage', # 0-3
    'crew_and_equip', 'START_WI_weight', 'PAX_WI_weight', 'TOTAL_LOAD_WI',  # 4-7
    'DO_WI_weight', 'TOTAL_TRAFFIC_LOAD', # 8-9
    'fuel_weight', 'AZFW', # 10-11
    'ATXW', # 12
    'fuel_taxi', 'ATOW', # 13-14
    'fuel_airborn', 'ALAW' # 15-16
]

sources = [0, 1, 2, 3, 4, 5, 6, 7,  8,  9, 10, 11, 12, 12, 14, 14]
targets = [7, 7, 7, 7, 8, 8, 9, 9, 11, 11, 12, 12, 13, 14, 15, 16]

flight = "AB_2509_5_PNQ"
flight_df = df[df["flight_id"] == flight].reset_index(drop=True)
row = flight_df.iloc[[42]]
date = row["creation_date"].values[0]
time = row["creation_time"].values[0]

values = []
for s, t in zip(sources, targets):
    if sources.count(s) > 1:
        values.append(row[labels[t]])
    else:
        values.append(row[labels[s]])

def plot_sankey(labels, sources, targets, values, date, time):
    # Create the Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        arrangement = "snap",
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels,
            align="center",
        ),
        link=dict(
            source=sources,
            target=targets,
            value=values
        )
    )])

    title = f"Flight: {flight}, date: {date}, time: {time}"
    fig.update_layout(title_text=title, font_size=10)
    # Save the figure as an HTML file
    fig.write_html("sankey_diagram.html")

    fig.show()


plot_sankey(labels, sources, targets, values, date, time)

In [36]:
import plotly.graph_objects as go
import imageio
import os
import numpy as np

# <----------- For animated sankey

labels = [
    'EIC', 'mail', 'cargo', 'baggage', # 0-3
    'crew_and_equip', 'START_WI_weight', 'PAX_WI_weight', 'TOTAL_LOAD_WI',  # 4-7
    'DO_WI_weight', 'TOTAL_TRAFFIC_LOAD', # 8-9
    'fuel_weight', 'AZFW', # 10-11
    'ATXW', # 12
    'fuel_taxi', 'ATOW', # 13-14
    'fuel_airborn', 'ALAW' # 15-16
]

sources = [0, 1, 2, 3, 4, 5, 6, 7,  8,  9, 10, 11, 12, 12, 14, 14]
targets = [7, 7, 7, 7, 8, 8, 9, 9, 11, 11, 12, 12, 13, 14, 15, 16]

# Pre-calculate node columns
def calculate_node_columns(sources, targets, labels):
    node_levels = {}
    for s, t in zip(sources, targets):
        node_levels[s] = node_levels.get(s, 0)
        node_levels[t] = max(node_levels.get(t, 0), node_levels[s] + 1)
    
    return [node_levels[i] for i in range(len(labels))]

node_columns = calculate_node_columns(sources, targets, labels)

flight = "AB_2509_5_PNQ"
flight_df = df[df["flight_id"] == flight].reset_index(drop=True)
frames = []
for i in range(len(flight_df)):
    row = flight_df.iloc[[i]]
    values = []
    for s, t in zip(sources, targets):
        if sources.count(s) > 1:
            values.append(row[labels[t]].values[0])
        else:
            values.append(row[labels[s]].values[0])
    frames.append(values)

def plot_sankey(labels, sources, targets, values, date, time, node_columns):
    # Create the Sankey diagram
    fig = go.Figure(data=[go.Sankey(
        arrangement="snap",
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),
            label=labels,
            color="blue",
            x=[x/max(node_columns) for x in node_columns],
            align="center",
        ),
        link=dict(
            source=sources,
            target=targets,
            value=values
        )
    )])

    title = f"Flight: {flight}, date: {date}, time: {time}"
    fig.update_layout(title_text=title, font_size=10)

    return fig

output_dir = f"sankey_gifs"
os.makedirs(output_dir, exist_ok=True)

images = []
for i, values in enumerate(frames):
    date = flight_df["creation_date"].iloc[i]
    time = flight_df["creation_time"].iloc[i]
    fig = plot_sankey(labels, sources, targets, values, date, time, node_columns)
    image_path = os.path.join(output_dir, f"{flight}_{i}.png")
    fig.write_image(image_path, format="png", engine="kaleido")
    images.append(imageio.imread(image_path))

imageio.mimsave('sankey_animation.gif', images, duration=2, loop=0)
print("Animation saved as sankey_animation.gif")





Animation saved as sankey_animation.gif
