In [5]:
import pandas as pd
import re
from IPython.display import display

# Load the CSV file
file_path = '/Users/nikyakovlev/Documents/GitHub/aircraft_load/data_engineering/niks_data_mata/UpdateFuelDataAction_entries_MNOP_1000.csv'
df = pd.read_csv(file_path)

# Regex pattern to match the required fields
pattern = re.compile(
    r'Trip Fuel\s*:\s*([\d.]+) KG|'
    r'FZFW\s*:\s*([\d.]+) KG|'
    r'Max Fuel Cap\s*:\s*([\d.]+) KG|'
    r'Minimum TOF\s*:\s*([\d.]+) KG|'
    r'Take Off Fuel\s*:\s*([\d.]+) KG|'
    r'Taxi Fuel\s*:\s*([\d.]+) KG'
)

# Function to extract the required values from a given text
def extract_fuel_data(text):
    matches = pattern.findall(text)
    # Create a dictionary with default None values
    result = {
        'Trip Fuel': None,
        'FZFW': None,
        'Max Fuel Cap': None,
        'Minimum TOF': None,
        'Take Off Fuel': None,
        'Taxi Fuel': None
    }
    # Update the dictionary with found values
    for match in matches:
        if match[0]: result['Trip Fuel'] = match[0]
        if match[1]: result['FZFW'] = match[1]
        if match[2]: result['Max Fuel Cap'] = match[2]
        if match[3]: result['Minimum TOF'] = match[3]
        if match[4]: result['Take Off Fuel'] = match[4]
        if match[5]: result['Taxi Fuel'] = match[5]
    return result

# Apply the function to the 'entry_details' column
extracted_data = df['entry_details'].apply(extract_fuel_data)

# Convert the result to a DataFrame
fuel_data_df = pd.DataFrame(extracted_data.tolist())

# Display the DataFrame
display(fuel_data_df)

# Save the DataFrame to a CSV file
fuel_data_df.to_csv('extracted_fuel_data.csv', index=False)


Unnamed: 0,Trip Fuel,FZFW,Max Fuel Cap,Minimum TOF,Take Off Fuel,Taxi Fuel
0,48619.0,148962.0,110953.0,55636.0,55636.0,875.0
1,39538.0,151352.0,78999.0,46823.0,46823.0,1000.0
2,18423.0,64710.0,26683.0,21150.0,21150.0,275.0
3,35046.0,144688.0,78999.0,40531.0,40531.0,500.0
4,40201.0,144420.0,109186.0,46688.0,46688.0,500.0
...,...,...,...,...,...,...
995,3691.0,67108.0,26683.0,9758.0,9900.0,200.0
996,,,,,,
997,8106.0,57681.0,18730.0,11431.0,12900.0,200.0
998,,,,,,


In [25]:
import pandas as pd
import re
from IPython.display import display

# Load the CSV file
file_path = '/Users/nikyakovlev/Documents/GitHub/aircraft_load/data_engineering/niks_data_mata/UpdateFuelDataAction_entries_ZYXW.csv'
df = pd.read_csv(file_path)

# Regex pattern to match the required fields
pattern = re.compile(
    r'Block Fuel\s+([\d.]+) KG\s+([\d.]+) KG|'
    r'Take Off Fuel\s+([\d.]+) KG\s+([\d.]+) KG|'
    r'Trip Fuel\s*:\s*([\d.]+) KG|'
    r'Max Fuel Cap\s*:\s*([\d.]+) KG|'
    r'Minimum TOF\s*:\s*([\d.]+) KG|'
    r'Taxi Fuel\s*:\s*([\d.]+) KG|'
    r'Take Off Fuel\s*:\s*([\d.]+) KG'
)

# Function to extract the required values from a given text
def extract_fuel_data(text):
    matches = pattern.findall(text)
    # Create a dictionary with default None values
    result = {
        'blockfuel_main1': None,
        'blockfuel_trim': None,
        'takeofffuel_main1': None,
        'takeofffuel_trim': None,
        'trip_fuel': None,
        'max_fuel_cap': None,
        'minimum_tof': None,
        'taxi_fuel': None,
        'takeoff_fuel': None
    }
    # Update the dictionary with found values
    for match in matches:
        if match[0] and match[1]:
            result['blockfuel_main1'] = match[0]
            result['blockfuel_trim'] = match[1]
        if match[2] and match[3]:
            result['takeofffuel_main1'] = match[2]
            result['takeofffuel_trim'] = match[3]
        if match[4]:
            result['trip_fuel'] = match[4]
        if match[5]:
            result['max_fuel_cap'] = match[5]
        if match[6]:
            result['minimum_tof'] = match[6]
        if match[7]:
            result['taxi_fuel'] = match[7]
        if match[8]:
            result['takeoff_fuel'] = match[8]
    return result

# Apply the function to the 'entry_details' column
extracted_data = df['entry_details'].apply(extract_fuel_data)

# Convert the result to a DataFrame
fuel_data_df = pd.DataFrame(extracted_data.tolist())

# Include the additional columns
df['timestamp'] = df['creation_time']
df['creation_time'] = pd.to_datetime(df['creation_time']).dt.to_period('M').astype(str)
df['creation_time'] = df['creation_time'].str.replace('-04', '-05')
df['combined'] = df['creation_time'] + '_' + df['airline_code'] + '_' + df['flight_number'].astype(str) + '_' + df['flight_date'].astype(str)

# Add the combined column and timestamp to the fuel_data_df
fuel_data_df['combined'] = df['combined']
fuel_data_df['timestamp'] = df['timestamp']

# Reorder columns to place the combined and timestamp columns at the beginning
fuel_data_df = fuel_data_df[[
    'combined', 'timestamp',
    'blockfuel_main1', 'blockfuel_trim', 'takeofffuel_main1', 'takeofffuel_trim',
    'trip_fuel', 'max_fuel_cap', 'minimum_tof', 'taxi_fuel', 'takeoff_fuel'
]]



# Display the DataFrame
display(fuel_data_df)

# Save the DataFrame to a CSV file
fuel_data_df.to_csv('extracted_fuel_data_ZYXW.csv', index=False)


Unnamed: 0,combined,timestamp,blockfuel_main1,blockfuel_trim,takeofffuel_main1,takeofffuel_trim,trip_fuel,max_fuel_cap,minimum_tof,taxi_fuel,takeoff_fuel
0,2024-05_ZY_999.0_30.0,2024-04-30 10:41:44,,,,,6387.0,18623.0,,138.0,8787.0
1,2024-05_ZY_999.0_30.0,2024-04-30 10:41:44,,,,,,,,,
2,2024-05_ZY_999.0_30.0,2024-04-30 10:52:34,,,,,6389.0,18623.0,,138.0,8809.0
3,2024-05_ZY_999.0_30.0,2024-04-30 10:52:34,,,,,,,,,
4,2024-05_ZY_4483.0_30.0,2024-04-30 12:40:18,,,,,10630.0,16304.0,,121.0,14407.0
...,...,...,...,...,...,...,...,...,...,...,...
120,2024-05_ZY_9900.0_6.0,2024-05-06 17:46:20,,,,,,,,,
121,2024-05_ZY_7058.0_6.0,2024-05-06 20:13:37,,,,,1514.0,13100.0,4189.0,81.0,4189.0
122,2024-05_ZY_9752.0_7.0,2024-05-06 20:14:40,,,,,56236.0,109186.0,65235.0,325.0,65235.0
123,2024-05_ZY_3626.0_6.0,2024-05-06 21:56:49,,,,,2500.0,18623.0,,200.0,6000.0


This code is for extracting all entries for a single flight

In [23]:
import pandas as pd
from IPython.display import display

# Load the CSV file
file_path = '/Users/nikyakovlev/Documents/GitHub/aircraft_load/data_engineering/niks_data_mata/extracted_fuel_data_MNOP.csv'
df = pd.read_csv(file_path)


# Function to filter by combined column value
def filter_by_combined_value(df, combined_value):
    filtered_df = df[df['combined'] == combined_value]
    return filtered_df

# Example usage
combined_value = '2024-05_MN_1136.0_1.0'
filtered_df = filter_by_combined_value(df, combined_value)

# Display the filtered DataFrame
display(filtered_df)

# Save the filtered DataFrame to a new CSV file if needed
filtered_df.to_csv('filtered_extracted_fuel_data_MNOP.csv', index=False)


Unnamed: 0,combined,timestamp,blockfuel_main1,blockfuel_trim,takeofffuel_main1,takeofffuel_trim,trip_fuel,max_fuel_cap,minimum_tof,taxi_fuel,takeoff_fuel
3,2024-05_MN_1136.0_1.0,2024-04-30 14:25:46,,,,,35046.0,78999.0,40531.0,500.0,40531.0
15,2024-05_MN_1136.0_1.0,2024-04-30 16:31:24,0.0,0.0,0.0,0.0,35046.0,78999.0,40531.0,500.0,40531.0
16,2024-05_MN_1136.0_1.0,2024-04-30 16:31:24,0.0,0.0,0.0,0.0,,,,,
79,2024-05_MN_1136.0_1.0,2024-04-30 22:44:02,0.0,0.0,0.0,0.0,30105.0,78999.0,37209.0,500.0,37209.0
153,2024-05_MN_1136.0_1.0,2024-05-01 00:48:17,36700.0,2400.0,36700.0,2400.0,30105.0,78999.0,37209.0,500.0,38600.0
154,2024-05_MN_1136.0_1.0,2024-05-01 00:48:17,36700.0,2400.0,36200.0,2400.0,,,,,
