In [1]:
import zipfile
import os
import rasterio
from rasterio import features
import numpy as np
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

# Define paths
desktop_path = os.path.expanduser("C:/Users/ericr/OneDrive/Desktop")
zip_file_path = os.path.join(desktop_path, "wgc.zip")
output_folder = os.path.join(desktop_path, "wgc_unzipped_logs")
year_folder = os.path.join(output_folder, "2019")

# Create the output folders if they don't exist
os.makedirs(year_folder, exist_ok=True)

# Unzip the file
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    # Extract all files to a temporary location
    temp_extract_path = os.path.join(desktop_path, "temp_wgc")
    zip_ref.extractall(temp_extract_path)
    
    # Move .igc files to the year folder
    for root, dirs, files in os.walk(temp_extract_path):
        for file in files:
            if file.endswith(".igc"):
                source_file = os.path.join(root, file)
                destination_file = os.path.join(year_folder, file)
                os.rename(source_file, destination_file)

# Remove the temporary extracted folder
import shutil
shutil.rmtree(temp_extract_path)

print(f"All .igc files from wgc.zip have been moved to {year_folder}")

300 to 700 in increments of 100 as threshold value

my_env\Scripts\activate
cd OneDrive/Desktop
python glider-engine_edited.py wgc_unzipped_logs/2019

In [3]:
import os
import csv
import shutil

# Paths
csvfile = "Flt-times_2024_data.csv"
source_folder = "wgc_unzipped_logs/2019"
destination_folder = "wgc_unzipped_logs/no_data"

# 1. Read the "File" column from the CSV
processed_files = set()
with open(csvfile, "r", encoding="utf-8") as f:
    reader = csv.DictReader(f)
    for row in reader:
        processed_files.add(row["File"])

# 2. List all *.igc files in the 2019 folder
all_igc_files = []
for filename in os.listdir(source_folder):
    # Check extension is .igc or .IGC
    if filename.lower().endswith(".igc"):
        all_igc_files.append(filename)

# 3. Find which files are NOT in the CSV
missing_files = []
for filename in all_igc_files:
    if filename not in processed_files:
        missing_files.append(filename)

# 4. Print the missing file names
print("Files not in Flt-times_2024_data.csv:")
for mf in missing_files:
    print(mf)

# 5. Move the missing files to wgc_unzipped_logs/no_data
#    (Ensure the no_data folder exists)
if not os.path.exists(destination_folder):
    os.makedirs(destination_folder)

for mf in missing_files:
    src = os.path.join(source_folder, mf)
    dst = os.path.join(destination_folder, mf)
    shutil.move(src, dst)

print(f"\nMoved {len(missing_files)} missing file(s) to '{destination_folder}'.")


Files not in Flt-times_2024.csv:
4536-9239003182.igc
4536-9239003215.igc
4536-9239003251.igc
4536-9239003283.igc
4536-9239003315.igc
4536-9239003347.igc
4536-9239003379.igc
4536-9239003443.igc
4536-9239003475.igc
4536-9239003507.igc
4536-9239003539.igc
4536-9239003571.igc
4536-9239003635.igc
4536-9239003667.igc
4536-9239003699.igc
4536-9240051793.igc
4536-9240051819.igc

Moved 17 missing file(s) to 'wgc_unzipped_logs/no_data'.


In [3]:
import os
import re
import pandas as pd
import plotly.express as px

# Define thresholds
thresholds = [300, 400, 500, 600, 700]

# Define the years to process
years = ['2013', '2019', '2020', '2021', '2022', '2023', '2024']

file_paths_by_year = {year: f'Flt-times_{year}_data.csv' for year in years}

data_by_year = {}
for year, file_path in file_paths_by_year.items():
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        data_by_year[year] = df
    else:
        print(f"Warning: File {file_path} does not exist. Skipping.")

for threshold in thresholds:
    sensor_info_col = f"Sensor Info ({threshold})"

    file_labels = []
    agl_all_first_values = []
    glider_types = []
    dates = []

    for year, df in data_by_year.items():
        if sensor_info_col not in df.columns:
            continue

        for _, row in df.iterrows():
            info = row[sensor_info_col]
            glider_type = row['Gtype']
            date_str = row['Date (MM/DD/YYYY)']
            date = pd.to_datetime(date_str, format='%m/%d/%Y', errors='coerce')
            if pd.isna(date):
                continue

            if pd.notna(info):
                # Example pattern: "MOP ... [1122, ...]AGL"
                matches = re.findall(r'(MOP|RPM).*?\[([\d,\s]+)\]AGL', info)
                if matches:
                    sensor, values_str = matches[0]
                    first_value_str = values_str.split(',')[0].strip()
                    if first_value_str.isdigit():
                        first_value = int(first_value_str)
                        agl_all_first_values.append(first_value)
                        file_labels.append(f"{year}_{row['File']}")
                        glider_types.append(glider_type)
                        dates.append(date)

    data = pd.DataFrame({
        'Date': dates,
        'File': file_labels,
        'Glider Type': glider_types,
        'AGL': agl_all_first_values
    })

    if data.empty:
        print(f"No data found for threshold {threshold}.")
        continue

    data = data.sort_values(by='Date', ascending=True)

    # Create a Plotly scatter (dot) plot, x = Glider Type, y = AGL
    fig = px.scatter(
        data_frame=data,
        x="Glider Type",
        y="AGL",
        color="Glider Type",
        symbol="Glider Type",
        hover_name="File",         # shows file name on hover
        hover_data={"Date": True}, # also shows date
        title=f"AGL Dot Plot for Threshold {threshold}",
        labels={"AGL": "AGL (ft)"}
    )

    # Increase marker size for clarity
    fig.update_traces(marker=dict(size=10))

    # Add a horizontal reference line at 1000 ft
    fig.add_hline(
        y=1000,
        line_dash="dash",
        line_color="green",
        annotation_text="1000 ft",
        annotation_position="top left"
    )

    fig.update_layout(
        legend_title_text="Glider Type",
        xaxis_title="Glider Type",
        yaxis_title="AGL (ft)",
        hovermode="closest"
    )

    fig.show()




# Define thresholds
thresholds = [300, 400, 500, 600, 700]

# Define the years to process
years = ['2013', '2019', '2020', '2021', '2022', '2023', '2024']

# Construct file paths for all specified years
file_paths_by_year = {year: f'Flt-times_{year}.csv' for year in years}

# Define marker map for glider types
marker_map = {
    "Ventus3FES": "o",
    "ASK-21": "s",
    "JS-3-18m": "D",
    "ASH31/21m": "^",
    "ASW-27": "v",
    "Discus-2c": "P"
}

# Load all data once
data_by_year = {}
for year, file_path in file_paths_by_year.items():
    if os.path.exists(file_path):
        df = pd.read_csv(file_path)
        data_by_year[year] = df
    else:
        print(f"Warning: File {file_path} does not exist and will be skipped.")

# Process each threshold
for threshold in thresholds:
    sensor_info_col = f"Sensor Info ({threshold})"
    file_labels = []
    agl_all_first_values = []
    glider_types = []
    dates = []

    # Loop through each year and extract data from the threshold-specific column
    for year, df in data_by_year.items():
        if sensor_info_col not in df.columns:
            # If the column doesn't exist for some reason, skip this threshold for that year
            continue

        # Extract rows where Sensor Info (threshold) is not null
        for _, row in df.iterrows():
            info = row[sensor_info_col]
            glider_type = row['Gtype']
            date_str = row['Date (MM/DD/YYYY)']
            # Convert date
            date = pd.to_datetime(date_str, format='%m/%d/%Y', errors='coerce')
            if pd.isna(date):
                continue

            if pd.notna(info):
                # Extract MOP or RPM sensor values from this threshold column
                # We look for patterns like: "MOP sensor at t=[...] and [values]AGL"
                # or "RPM sensor at t=[...] and [values]AGL"
                # The threshold columns contain similar info strings as the original Sensor Info column.
                matches = re.findall(r'(MOP|RPM).*?\[([\d, ]+)\]AGL', info)
                if matches:
                    # Take the first occurrence
                    sensor, values = matches[0]
                    first_value_str = values.split(',')[0].strip()
                    if first_value_str.isdigit():
                        first_value = int(first_value_str)
                        agl_all_first_values.append(first_value)
                        file_labels.append(f"{year}_{row['File']}")
                        glider_types.append(glider_type)
                        dates.append(date)

    # Create a DataFrame for plotting
    data = pd.DataFrame({
        'Date': dates,
        'File Label': file_labels,
        'AGL': agl_all_first_values,
        'Glider Type': glider_types
    })

    if data.empty:
        print(f"No data found for threshold {threshold}.")
        continue

    # Sort data by date
    data = data.sort_values(by='Date', ascending=True).reset_index(drop=True)

    # Plotting
    plt.figure(figsize=(16, 8))

    # Create a set to keep track of glider types for the legend
    plotted_glider_types = set()

    for idx, row in data.iterrows():
        marker = marker_map.get(row['Glider Type'], "*")  # Default marker if type not in map

        # Plot each point with the same color (blue)
        plt.scatter(
            idx,
            row['AGL'],
            color='b',
            marker=marker,
            s=100,
            label=row['Glider Type'] if row['Glider Type'] not in plotted_glider_types else None
        )

        plotted_glider_types.add(row['Glider Type'])

    plt.axhline(y=1000, color='g', linestyle='--', linewidth=1, label='1000 ft')

    # Set x-ticks
    if len(data) > 0:
        step = max(1, len(data)//40)
        plt.xticks(
            ticks=range(0, len(data), step),
            labels=data['File Label'][::step],
            rotation=45,
            ha='right'
        )

    plt.xlabel('File Name (Ordered by Date)')
    plt.ylabel('AGL (ft)')
    plt.title(f'First AGL Value when MOP or RPM sensor sounded (All Gliders) for Threshold {threshold}')
    plt.grid(axis='y', linestyle='--', alpha=0.7)
    plt.legend(title="Glider Types", loc='upper left', bbox_to_anchor=(1, 1))
    plt.tight_layout()
    plt.show()
