# 1. Imports

In [1]:
#pip install -U ydata-profiling

In [2]:
#from ydata_profiling import ProfileReport
#from matplotlib import pyplot as plt
#import seaborn as sns
import pandas as pd
import re
import json
import csv

# 1. Read in Data

In [11]:
file_path = "data/Europa/europa_tripfile.feather"

In [12]:
# Beispieldaten
#data = pd.read_csv(file_path, delimiter=';', quotechar='"', engine='python')

In [13]:
# Nicht für Beispieldaten, nur für csv
#data = pd.read_csv(file_path, delimiter=',', quotechar='"', engine='python', on_bad_lines='skip')

In [38]:
data_europa = pd.read_feather(file_path)

# 2. Understanding the Data

In [None]:
profile = ProfileReport(data_europa, title="Profiling Report")

In [None]:
profile

In [None]:
# @title creation_time vs count()

def _plot_series(series, series_name, series_index=0):
  palette = list(sns.palettes.mpl_palette('Dark2'))
  counted = (series['creation_time']
                .value_counts()
              .reset_index(name='counts')
              .rename({'index': 'creation_time'}, axis=1)
              .sort_values('creation_time', ascending=True))
  xs = counted['creation_time']
  ys = counted['counts']
  plt.plot(xs, ys, label=series_name, color=palette[series_index % len(palette)])

fig, ax = plt.subplots(figsize=(10, 5.2), layout='constrained')
df_sorted = data.sort_values('creation_time', ascending=True)
for i, (series_name, series) in enumerate(df_sorted.groupby('user_name')):
  _plot_series(series, series_name, i)
  fig.legend(title='user_name', bbox_to_anchor=(1, 1), loc='upper left')
sns.despine(fig=fig, ax=ax)
plt.xlabel('creation_time')
_ = plt.ylabel('count()')

In [39]:
data_europa["action_name"].unique()

array(['AssignLoadplanAction', 'AssignLCCAction', 'ASMMsgProcessor',
       'AutoLoadBulkAction', 'AutoLoadULDAction', 'CargoFinalActionTDM',
       'AssignUnassignViewAction', 'AutomaticNotificationAction',
       'CargoFinalAction', 'CalculateWeightAndTrimAction',
       'CreateLoadingInstructionAction', 'ChatConfirmMessageAction',
       'ChatSendMessageAction', 'CreateAndSendFuelOrderAction',
       'CreateBaggageLoadItemsAction', 'CreateLoadsheetAction',
       'ChangeFlightLegStateAction', 'CloseLegAction',
       'CreatePostDepartureMessagesAction',
       'CreateAndSendUldOrdMessageAction', 'ClearFlightsAction',
       'CreateZFWMessageAction', 'CrewMsgProcessor',
       'PAXBOOKINGINMsgProcessor', 'FlightPlanFiguresInMsgProcessor',
       'EstimateStorePaxDataAction', 'RampFinalAction',
       'InternalCreateLoadingInstructionAct',
       'InternalCreateLoadsheetAction', 'ResetLoadingListRecordsAction',
       'GetCabinConfigurationsAction', 'ReopenLegAction',
       'FuelData

# 3. Process the Data

In [None]:
# List of all keys, including weights, indices, and specific aircraft parameters
weight_keys = [
    "START_WI weight",
    "START_WI index",
    "DO_WI weight",
    "DO_WI index",
    "PAX_WI weight",
    "PAX_WI index",
    "TOTAL_DEADLOAD_WI weight",
    "TOTAL_DEADLOAD_WI index",
    "TOTAL_LOAD_WI",
    "TOTAL_TRAFFIC_LOAD",
    "FUEL_INDEX",
    "AZFW",
    "ATOW",
    "ALAW",
    "FUEL_INDEX",
    "AFT_LIMIT_ZFW",
    "Start Weight",
    "Start Index",
    "Total Weight",
    "Index",
    "ATXW",
    "FWD_LIMIT_ZFW"
]

# Create the regex pattern dynamically from the list of keys
pattern = rf'({"|".join(map(re.escape, weight_keys))})\s*:\s*([0-9.]+)'

# Define a function to extract the relevant details
def extract_weight_details(data_string):
    extracted_data = re.findall(pattern, data_string)
    return {key.strip(): float(value) for key, value in extracted_data}

# Apply the function to the 'entry_details' column
data['extracted_data'] = data['entry_details'].astype(str).apply(extract_weight_details)

# Convert extracted dictionary into separate columns
for key in weight_keys:
    data[key] = data['extracted_data'].apply(lambda x: x.get(key, None))

In [None]:
data.to_csv("_tripfile_extracted_weights.csv")