In [None]:
# Required for importing modules from parent directory
import os
import sys

current_dir = os.path.dirname(os.path.abspath("__file__"))
parent_dir = os.path.dirname(current_dir)
print(parent_dir)
sys.path.append(parent_dir)

In [None]:
import pandas as pd

from src.loader import TripLoader
from src.utils import *

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)

In [None]:
import re


def extract_UpdateFlightAction(entry_string: str, header_category: str):
    if header_category == "received":
        leg_keys = COLUMNS_UpdateFlightAction_RECEIVED
    elif header_category == "saved":
        leg_keys = COLUMNS_UpdateFlightAction_SAVED

    entry_string = entry_string.replace("\r", "")
    lines = entry_string.split("\n")
    print(lines)

    extracted_dict = {}
    for line in lines[0:1]:
        for key in COLUMNS_UpdateFlightAction_METADATA:
            pattern = f"{key}: (.*?)(?=\s+\w+:|$)"
            match = re.search(pattern, line)
            if match:
                extracted_dict[key] = match.group(1).strip()

    legs = []
    legs_start_index = lines.index("Legs:") + 2
    for line in lines[legs_start_index:]:
        if line.strip() == "":
            continue

        values = re.findall(r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}|\S+", line)
        leg_data = dict(zip(leg_keys, values))
        legs.append(leg_data)

    extracted_dict["legs"] = legs
    return extracted_dict

In [None]:
trip_loader = TripLoader()
trips_data = trip_loader.trips_ABCD

In [None]:
filtered = trips_data[trips_data["action_name"] == "UpdateFlightAction"]
for idx, row in filtered.iterrows():
    entry_string = row["entry_details"]
    print(row["entry_details"])
    extracted_dict = extract_UpdateFlightAction(entry_string, "received")
    print(extracted_dict)
    break

In [None]:
trips_data2 = trip_loader.trips_MNOP

In [None]:
trips_data3 = trip_loader.trips_ZYXW

In [None]:
df_air = pd.read_csv("../src/data/extracted/ABCD_AssignLCCAction.csv")

In [None]:
df_air.head()

In [None]:
df_air.info()

In [None]:
df_air2 = pd.read_csv("../src/data/extracted/MNOP_AssignLCCAction.csv")

In [None]:
df_air3 = pd.read_csv("../src/data/extracted/ZYXW_AssignLCCAction.csv")

In [None]:
df_air_all = pd.concat([df_air, df_air2, df_air3])

In [None]:
df_air_all = df_air_all[["flight_id", "A/L", "Date", "DEP", "ARR"]]

In [None]:
df_air_all.head()

In [None]:
# group by flight id and keep all first cols with agg
grouped = (
    df_air_all.groupby("flight_id")
    .agg({"DEP": "first", "ARR": "first", "A/L": "first"})
    .reset_index()
)
grouped.info()

In [None]:
grouped.head()

In [None]:
airports = pd.read_csv("airports.csv")

In [None]:
arr_airports = pd.read_csv("arr_airports.csv")

In [None]:
df_all = pd.merge(grouped, airports, left_on="DEP", right_on="Airport Code", how="left")

In [None]:
df_all = pd.merge(
    df_all, arr_airports, left_on="ARR", right_on="arr_Airport", how="left"
)

In [None]:
df_all.head()

In [None]:
df = df_all

### Erstellen des DFs für Anzahl der Routen

In [None]:
df["route"] = df["DEP"] + "_" + df["ARR"]

# Displaying the DataFrame
df.head()

In [None]:
df.info()

In [None]:
route_counts = df.groupby("route").size().reset_index(name="flight_count")

In [None]:
route_counts.head()

In [None]:
route_counts.info()

In [None]:
# export to excel
route_counts.to_excel("route_counts.xlsx", index=False)

### Erstellen des DFs für die Routen-Karte

In [None]:
df = df_all

In [None]:
import pandas as pd

# Angenommen, dein DataFrame heißt df
# Beispiel für das Laden des DataFrame, falls nötig
# df = pd.read_csv('deine_datei.csv')

# Zeige die ursprünglichen Spaltennamen an
print("Ursprüngliche Spaltennamen im DataFrame:", df.columns.tolist())

# Entferne führende und nachfolgende Leerzeichen in den Spaltennamen
df.columns = df.columns.str.strip()

# Zeige die bereinigten Spaltennamen an
print("Bereinigte Spaltennamen im DataFrame:", df.columns.tolist())

# Neue Struktur erstellen
rows = []

# Überprüfen, ob die Spalten existieren, bevor sie verwendet werden
required_columns = [
    "flight_id",
    "DEP",
    "ARR",
    "Airport Code",
    "Latitude",
    "Longitude",
    "City",
    "Country",
    "arr_Airport",
    "arr_City",
    "arr_Country",
    "arr_Latitude",
    "arr_Longitude",
]

missing_columns = [column for column in required_columns if column not in df.columns]
if missing_columns:
    print(f"Fehlende Spalten im DataFrame: {missing_columns}")
else:
    # Transformation durchführen
    for idx, row in df.iterrows():
        # Erstellen der Abflug-Zeile
        dep_row = {
            "flight_id": row["flight_id"],
            "Type": "DEP",
            "Airport": row["DEP"],
            "Code": row["Airport Code"],
            "Latitude": row["Latitude"],
            "Longitude": row["Longitude"],
            "City": row["City"],
            "Country": row["Country"],
            "route": f"{row['DEP']}_{row['ARR']}",  # Hinzufügen der 'route'-Spalte
        }
        rows.append(dep_row)

        # Erstellen der Ankunfts-Zeile
        arr_row = {
            "flight_id": row["flight_id"],
            "Type": "ARR",
            "Airport": row["ARR"],
            "Code": row["arr_Airport"],
            "Latitude": row["arr_Latitude"],
            "Longitude": row["arr_Longitude"],
            "City": row["arr_City"],
            "Country": row["arr_Country"],
            "route": f"{row['DEP']}_{row['ARR']}",  # Hinzufügen der 'route'-Spalte
        }
        rows.append(arr_row)

    # Neuer DataFrame aus den neuen Zeilen erstellen
    transformed_df = pd.DataFrame(rows)

    # Ausgabe anzeigen
    print(transformed_df)

    # Wenn du die DataFrame in eine CSV-Datei speichern möchtest:
    # transformed_df.to_csv('transformed_flights.csv', index=False)

In [None]:
transformed_df.head()

In [None]:
# export to excel
transformed_df.to_excel("df_dep_arr_rows.xlsx", index=False)

In [None]:
df_air_dep = df_air_all[["flight_id", "A/L", "Date", "DEP"]]

In [None]:
df_air_arr = df_air_all[["flight_id", "A/L", "Date", "ARR"]]

In [None]:
df_air_dep.head()

In [None]:
grouped = (
    df_air_dep.groupby("flight_id")
    .agg({"DEP": "first", "Date": "count", "A/L": "first"})
    .reset_index()
)

In [None]:
grouped2 = (
    df_air_arr.groupby("flight_id")
    .agg({"ARR": "first", "Date": "count", "A/L": "first"})
    .reset_index()
)

In [None]:
grouped.head()

In [None]:
grouped.info()

In [None]:
df_dep = grouped.groupby("DEP").agg({"A/L": "first"}).reset_index()

In [None]:
df_arr = grouped2.groupby("ARR").agg({"A/L": "first"}).reset_index()

In [None]:
df_dep.info()

In [None]:
df_dep.head()

In [None]:
df_arr.info()

In [None]:
df_arr.head()

In [None]:
# airports = pd.read_csv("airports.csv")

In [None]:
airports.head()

In [None]:
# arr_airports = pd.read_csv("arr_airports.csv")

In [None]:
arr_airports.head()

In [None]:
df_dep_export = pd.merge(
    df_dep, airports, left_on="DEP", right_on="Airport Code", how="left"
)

In [None]:
# export
df_dep_export.to_excel("df_dep_export.xlsx", index=False)

In [None]:
df_arr_export = pd.merge(
    df_arr, arr_airports, left_on="ARR", right_on="Airport", how="left"
)

In [None]:
# export
df_arr_export.to_excel("df_arr_export.xlsx", index=False)

In [None]:
# df_arr = pd.merge(df_dep, airports, left_on="ARR", right_on="Airport Code", how="left")

In [None]:
# df_arr.head()

In [None]:
# df_arr_grouped_test = df_arr.groupby(["ARR"]).size().reset_index(name="count")

In [None]:
# export to csv
df_arr_grouped_test.to_csv("df_arr_grouped_test.csv", index=False)

In [None]:
df_dep_grouped_test = df_dep.groupby(["DEP"]).size().reset_index(name="count")

In [None]:
df_dep_grouped_test.info()

In [None]:
# group by ARR with latitudes and longitudes
df_arr_grouped = (
    df_arr.groupby(["ARR", "Latitude", "Longitude"]).size().reset_index(name="count")
)

In [None]:
df_arr_grouped.head()