In [141]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit

In [142]:
date = "09-20-24"
data_location = rf"E:\Project 6 - Temperature\Experiments\data_analysis\{date}"
data_is_cleaned = ["Yes", "No", "Partially"][2]

# Combine all peaks/troughs into one dataframe

In [143]:
all_peaks_and_troughs_df = pd.DataFrame()
total_positions = 42
positions = range(total_positions)
# positions = [i for i in range(13)] + [i + 26 for i in range(13)]
for pos in positions:
    # Load peaks and troughs
    if data_is_cleaned == "Yes":
        peaks_and_troughs_df = pd.read_csv(data_location + f"\Pos{pos}_curated_peaks_and_troughs.csv")
    elif data_is_cleaned == "No":
        peaks_and_troughs_df = pd.read_csv(data_location + f"\Pos{pos}_peaks_and_troughs.csv")
    elif data_is_cleaned == "Partially":
        try:
            peaks_and_troughs_df = pd.read_csv(data_location + f"\Pos{pos}_curated_peaks_and_troughs.csv")
            print(f"Using cleaned data for position {pos}")
        except:
            peaks_and_troughs_df = pd.read_csv(data_location + f"\Pos{pos}_peaks_and_troughs.csv")
            print(f"Using raw data for position {pos}")
    try:
        # Load flagged data
        flagged_df = pd.read_csv(data_location + f"\Pos{pos}_flagged_data.csv")
        # Remove TRACK_IDs on flagged data from peaks and troughs
        peaks_and_troughs_df = peaks_and_troughs_df[~peaks_and_troughs_df["TRACK_ID"].isin(flagged_df["TRACK_ID"])]
    except:
        print(f"No flagged data for position {pos}")
    # Add position column
    peaks_and_troughs_df["POSITION"] = pos
    # Add to all_peaks_and_troughs_df
    all_peaks_and_troughs_df = pd.concat([all_peaks_and_troughs_df, peaks_and_troughs_df])
# Reset index
all_peaks_and_troughs_df.reset_index(drop=True, inplace=True)

Using cleaned data for position 0
Using cleaned data for position 1
Using raw data for position 2
No flagged data for position 2
Using raw data for position 3
No flagged data for position 3
Using raw data for position 4
No flagged data for position 4
Using raw data for position 5
No flagged data for position 5
Using raw data for position 6
No flagged data for position 6
Using cleaned data for position 7
Using cleaned data for position 8
Using cleaned data for position 9
Using cleaned data for position 10
Using cleaned data for position 11
Using cleaned data for position 12
Using cleaned data for position 13
Using cleaned data for position 14
Using cleaned data for position 15
Using cleaned data for position 16
Using cleaned data for position 17
Using cleaned data for position 18
Using cleaned data for position 19
Using raw data for position 20
No flagged data for position 20
Using cleaned data for position 21
Using cleaned data for position 22
Using cleaned data for position 23
Using c

  all_peaks_and_troughs_df = pd.concat([all_peaks_and_troughs_df, peaks_and_troughs_df])


# Fix any NaN values in the data

In [144]:
# If there is a track that contains NaNs in the peaks and troughs, flag it
# If it contains NaNs in the cycle numbers, re-number that track
flagged_tracks = {
    'type_nan': [],
    'time_nan': [],
    'peak_cycle_renumbering': [],
    'trough_cycle_renumbering': [],
    'indexes_to_remove': [],
}
# Do it for each position
for position in all_peaks_and_troughs_df["POSITION"].unique():
    position_tracks = all_peaks_and_troughs_df[all_peaks_and_troughs_df["POSITION"] == position]
    # Check if there are NaNs on the track id
    if position_tracks["TRACK_ID"].isnull().values.any():
        # Get specific index where this happens
        indexes_to_remove = position_tracks[position_tracks["TRACK_ID"].isnull()].index.tolist()
        flagged_tracks["indexes_to_remove"].extend(indexes_to_remove)
    for track_id in position_tracks["TRACK_ID"].unique():
        track_df = position_tracks[position_tracks["TRACK_ID"] == track_id]
        # Check if there are NaNs
        if track_df.isnull().values.any():
            if track_df["TIME"].isnull().values.any():
                # NaN is in time or track id, remove
                flagged_tracks["time_nan"].append((position, track_id))
            elif track_df["TYPE"].isnull().values.any():
                # NaN is in the type column
                flagged_tracks["type_nan"].append((position, track_id))
            elif track_df["CYCLE"].isnull().values.any():
                # Check if nan is on a peak or trough
                peak_cycle_numbers = track_df[track_df["TYPE"] == "PEAK"]["CYCLE"].values
                trough_cycle_numbers = track_df[track_df["TYPE"] == "TROUGH"]["CYCLE"].values
                if np.isnan(peak_cycle_numbers).any():
                    # NaN is in the peak cycle numbers
                    flagged_tracks["peak_cycle_renumbering"].append((position, track_id))
                if np.isnan(trough_cycle_numbers).any():
                    # NaN is in the trough cycle numbers
                    flagged_tracks["trough_cycle_renumbering"].append((position, track_id))
        else:
            # Check that peak and troughs cycle numbering is continous
            peak_cycle_numbers = track_df[track_df["TYPE"] == "PEAK"]["CYCLE"].values
            trough_cycle_numbers = track_df[track_df["TYPE"] == "TROUGH"]["CYCLE"].values
            if np.any(np.diff(peak_cycle_numbers) != 1):
                # Peak cycle numbers are not continous
                flagged_tracks['peak_cycle_renumbering'].append((position, track_id))
            if np.any(np.diff(trough_cycle_numbers) != 1):
                # Trough cycle numbers are not continous
                flagged_tracks['trough_cycle_renumbering'].append((position, track_id))

In [145]:
# Print how many flagged types per category there are
for key, value in flagged_tracks.items():
    print(f"{key}: {len(value)}")

type_nan: 201
time_nan: 3
peak_cycle_renumbering: 0
trough_cycle_renumbering: 0
indexes_to_remove: 2


In [146]:
processed_peaks_and_troughs_df = all_peaks_and_troughs_df.copy()

In [147]:
# Fix flagged tracks: peak_cycle_renumbering
for i in range(len(flagged_tracks["peak_cycle_renumbering"])):
    position, track_id = flagged_tracks["peak_cycle_renumbering"][i]
    track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == position) & (all_peaks_and_troughs_df["TRACK_ID"] == track_id)]
    # Renumber peak cycle numbers
    # get current peak order based on time
    # print(track_df[track_df["TYPE"] == "PEAK"])
    peak_cycle_numbers = track_df[track_df["TYPE"] == "PEAK"]["CYCLE"].values
    peak_cycle_numbers = np.arange(1, len(peak_cycle_numbers) + 1)
    # Update the processed_peaks_and_troughs_df
    indexes_to_renumber = track_df[track_df["TYPE"] == "PEAK"].sort_values("TIME").index
    processed_peaks_and_troughs_df.loc[indexes_to_renumber, "CYCLE"] = peak_cycle_numbers

In [148]:
# Fix flagged tracks: trough_cycle_renumbering
for i in range(len(flagged_tracks["trough_cycle_renumbering"])):
    position, track_id = flagged_tracks["trough_cycle_renumbering"][i]
    track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == position) & (all_peaks_and_troughs_df["TRACK_ID"] == track_id)]
    # Renumber trough cycle numbers
    trough_cycle_numbers = track_df[track_df["TYPE"] == "TROUGH"]["CYCLE"].values
    trough_cycle_numbers = np.arange(1, len(trough_cycle_numbers) + 1)
    # Update the processed_peaks_and_troughs_df
    indexes_to_renumber = track_df[track_df["TYPE"] == "TROUGH"].sort_values("TIME").index
    processed_peaks_and_troughs_df.loc[indexes_to_renumber, "CYCLE"] = trough_cycle_numbers

In [149]:
# Fix flagged tracks: type_nan
ids_without_comparison = []
positions_without_comparison = []

# Replace nan with the type of the closest peak or trough in index
for i in range(len(flagged_tracks["type_nan"])):
    position, track_id = flagged_tracks["type_nan"][i]
    track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == position) & (all_peaks_and_troughs_df["TRACK_ID"] == track_id)]
    # Get the indexes of the nans
    indexes_to_replace = track_df[track_df["TYPE"].isnull()].index
    # Get the indexes of the peaks and troughs
    peaks_indexes = track_df[track_df["TYPE"] == "PEAK"].index
    troughs_indexes = track_df[track_df["TYPE"] == "TROUGH"].index
    # Replace the nans with the type of the closest peak or trough
    for index in indexes_to_replace:
        try:
            # Get the closest peak and trough
            closest_peak_index = peaks_indexes[np.abs(peaks_indexes - index).argmin()]
            closest_trough_index = troughs_indexes[np.abs(troughs_indexes - index).argmin()]
        except:
            ids_without_comparison.append(track_id)
            positions_without_comparison.append(position)
            continue
        # Get the type of the closest peak or trough
        if abs(closest_peak_index - index) < abs(closest_trough_index - index):
            type_of_closest = "PEAK"
        else:
            type_of_closest = "TROUGH"
        # Replace the nan with the type of the closest peak or trough
        processed_peaks_and_troughs_df.loc[index, "TYPE"] = type_of_closest

In [150]:
print(f"There are {len(ids_without_comparison)} tracks without comparison in positions {np.unique(positions_without_comparison)}")

There are 48 tracks without comparison in positions [37 38 39]


In [151]:
def map_cycle_numbers_to_peak_types(cycle_numbers):
    peak_types = np.array([""] * len(cycle_numbers), dtype=object)
    diff = np.diff(cycle_numbers, prepend=0)
    switch_place = np.where(diff <= 0)[0]
    peak_range = range(switch_place[0]) if len(switch_place) > 0 else 0
    peak_types[peak_range] = "PEAK"
    trough_range = range(switch_place[0], len(cycle_numbers)) if len(switch_place) > 0 else []
    peak_types[trough_range] = "TROUGH"
    return peak_types

cycle_numbers = [1, 1]
peak_types = map_cycle_numbers_to_peak_types(cycle_numbers)
assert np.all(peak_types == ["PEAK", "TROUGH"])

cycle_numbers = [1]
peak_types = map_cycle_numbers_to_peak_types(cycle_numbers)
assert np.all(peak_types == ["PEAK"])

cycle_numbers = [1, 2, 3, 1, 2, 3]
peak_types = map_cycle_numbers_to_peak_types(cycle_numbers)
assert np.all(peak_types == ["PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH", "TROUGH"])

In [152]:
pos = 11
track_id = 0
track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == pos) & (all_peaks_and_troughs_df["TRACK_ID"] == track_id)]
print(track_df)

Empty DataFrame
Columns: [TRACK_ID, TIME, RATIO, TYPE, CYCLE, POSITION]
Index: []


In [153]:
indexes = []
values_to_set = []

# Manually fix some NaNs if necessary
if date == "03-29-24":
    indexes = [
        (6, 156), # position, track
        (7, 6), (7, 51), (7, 60), (7, 64),
        (20, 2), (20, 3), (20, 10), (20, 11), (20, 12), (20, 15),
        (20, 31), (20, 41), (20, 43), (20, 47), (20, 49), (20, 52), (20, 58), (20, 61),
        (20, 62), (20, 63), (20, 76), (21, 35), (21, 36), (21, 44),
        (21, 47), (21, 49), (21, 59), (21, 74), (21, 93), (21, 97),
        (21, 118), (21, 120), (21, 136), (21, 143), (21, 149), (21, 158), (21, 159), 
        (21, 163), (21, 166), (21, 181),
        (22, 0), (23, 40), (23, 43), (23, 47), (23, 77), (23, 200), (23, 203), (23, 227),
        (23, 229), (24, 123), (24, 129), (24, 132), (24, 154), (24, 160),
        (26, 94),

    ]
    values_to_set = [
        ["PEAK", "PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"], ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"], ["PEAK", "PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"], ["PEAK", "PEAK", "TROUGH", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"], ["PEAK", "PEAK", "TROUGH", "TROUGH"], ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "TROUGH"], ["PEAK", "PEAK", "TROUGH"], ["PEAK", "PEAK", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "TROUGH", "TROUGH"], ["PEAK", "TROUGH"],
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "PEAK", "TROUGH", "TROUGH"],
        ["PEAK"] * 8 + ["TROUGH"] * 7, ["PEAK"] * 13 + ["TROUGH"] * 12,
        ["PEAK"] * 14 + ["TROUGH"] * 13, ["PEAK"] * 10 + ["TROUGH"] * 10,
        ["PEAK"] * 12 + ["TROUGH"] * 12, ["PEAK"] * 11 + ["TROUGH"] * 11, ["PEAK"] * 13 + ["TROUGH"] * 13,
        ["PEAK"] * 10 + ["TROUGH"] * 9,
        ["PEAK"] * 9 + ["TROUGH"] * 8,
        ["PEAK"] * 3 + ["TROUGH"] * 2, ["PEAK", "PEAK", "TROUGH"], ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"],
        ["PEAK"] * 3 + ["TROUGH"] * 3,

    ]

if date == "08-09-24":
    indexes = [
        (0, 15), # position, track
        (0, 31), (0, 39), (0, 47), (0, 49), (0, 237), (0, 363),

        (1, 8), (1, 10), (1, 38), (1, 116), (1, 170),

        (5, 12), (6, 53), 

        # here
        (11, 11), (11, 23), (11, 30), (11, 33),
        (11, 36), (11, 42), (11, 76), (11, 181),
        (11, 224), (11, 227),

        (12, 6), (12, 8), (12, 9), (12, 14), (12, 16), (12, 17),
        (12, 26), (12, 27), (12, 32), (12, 33), (12, 34), (12, 35),
        (12, 37), (12, 40), (12, 41), (12, 44), (12, 46), (12, 47),
        (12, 52), (12, 53), (12, 55), (12, 60), (12, 63), (12, 67),
        (12, 69), (12, 70), (12, 72), (12, 74), (12, 76), (12, 79),
        (12, 86), (12, 87), (12, 88), (12, 89), (12, 90), (12, 92),
        (12, 94), (12, 96), (12, 98), (12, 99), (12, 100), (12, 101),
        (12, 103), (12, 105), (12, 107), (12, 109),

        (13, 4), 

        (15, 34), (15, 258), (15, 278), (15, 310), (15, 458), (15, 540),

        
        
        (17, 61), (17, 215), (17, 231), (18, 96),
        (20, 4), (20, 19), (23, 81), (25, 135), (26, 9), (26, 91),
        (26, 153), (26, 293), (26, 300), (26, 303),
    ]
    values_to_set = [
        ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"],

        ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "TROUGH", "TROUGH"],
        ["PEAK",  "PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"],

        ["PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH"],

        # here
        ["PEAK", "PEAK", "TROUGH"], 
        ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "TROUGH"], 
        ["PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "TROUGH"],
        ["PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH"],
        # 12
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], 

        # 13
        ["PEAK", "TROUGH"],
        # 15
        ["PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH", "TROUGH"],
        ["PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "TROUGH", "TROUGH"],
        ["PEAK", "TROUGH", "TROUGH"],


        
        ["PEAK", "PEAK", "PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH", "TROUGH", "TROUGH"],
        ["PEAK", "PEAK", "PEAK", "PEAK", "TROUGH", "TROUGH", "TROUGH", "TROUGH"],
        ["PEAK", "TROUGH", "TROUGH"], ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK",  "PEAK", "TROUGH", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK",  "PEAK", "TROUGH", "TROUGH"], ["PEAK", "TROUGH"], 
        ["PEAK",  "PEAK", "TROUGH", "TROUGH"],
    ]

if date == "08-23-24":
    indexes = []
    values_to_set = []

    for i in range(len(ids_without_comparison)):
        position = positions_without_comparison[i]
        track_id = ids_without_comparison[i]
        track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == position) &
                                            (all_peaks_and_troughs_df["TRACK_ID"] == track_id)] 
        indexes.append((position, track_id))
        peak_types = map_cycle_numbers_to_peak_types(track_df["CYCLE"].values)
        values_to_set.append(peak_types)

if date == "08-28-24":
    indexes = []
    values_to_set = []

    for i in range(len(ids_without_comparison)):
        position = positions_without_comparison[i]
        track_id = ids_without_comparison[i]
        track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == position) &
                                            (all_peaks_and_troughs_df["TRACK_ID"] == track_id)] 
        indexes.append((position, track_id))
        peak_types = map_cycle_numbers_to_peak_types(track_df["CYCLE"].values)
        values_to_set.append(peak_types)

if date == "08-16-24":
    indexes = []
    values_to_set = []

    for i in range(len(ids_without_comparison)):
        position = positions_without_comparison[i]
        track_id = ids_without_comparison[i]
        track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == position) &
                                            (all_peaks_and_troughs_df["TRACK_ID"] == track_id)] 
        indexes.append((position, track_id))
        peak_types = map_cycle_numbers_to_peak_types(track_df["CYCLE"].values)
        values_to_set.append(peak_types)

if date == "09-20-24":
    indexes = []
    values_to_set = []

    for i in range(len(ids_without_comparison)):
        position = positions_without_comparison[i]
        track_id = ids_without_comparison[i]
        track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == position) &
                                            (all_peaks_and_troughs_df["TRACK_ID"] == track_id)] 
        indexes.append((position, track_id))
        peak_types = map_cycle_numbers_to_peak_types(track_df["CYCLE"].values)
        values_to_set.append(peak_types)
    
for position, track in indexes:
    index_location = processed_peaks_and_troughs_df[(processed_peaks_and_troughs_df["POSITION"] == position) & (processed_peaks_and_troughs_df["TRACK_ID"] == track)].index
    processed_peaks_and_troughs_df.loc[index_location, "TYPE"] = values_to_set.pop(0)

In [154]:
# Fix flagged tracks: time_nan
indexes_to_remove = []
# For each one, check which index has NaN on time and remove those indexes
for i in range(len(flagged_tracks["time_nan"])):
    position, track_id = flagged_tracks["time_nan"][i]
    track_df = all_peaks_and_troughs_df[(all_peaks_and_troughs_df["POSITION"] == position) & (all_peaks_and_troughs_df["TRACK_ID"] == track_id)]
    # Check which indexes have NaNs on time
    if track_df["TIME"].isnull().values.any():
        indexes_to_remove += track_df[track_df["TIME"].isnull()].index.to_list()
# Add indexes flagged for removal
indexes_to_remove += flagged_tracks["indexes_to_remove"]
# Remove those indexes
processed_peaks_and_troughs_df = processed_peaks_and_troughs_df.drop(index=indexes_to_remove)

In [155]:
# Check that there are no more nans
if processed_peaks_and_troughs_df.isnull().values.any():
    print("There are still NaNs")
    # print the indexes of the NaNs
    print(processed_peaks_and_troughs_df[processed_peaks_and_troughs_df.isnull().any(axis=1)])
else:
    print("There are no more NaNs")

There are no more NaNs


In [156]:
# Manually fix remaining NaNs if necessary
if date == "11-04-22":
    feature = "CYCLE"
    indexes = [1310, 1683, 6921]
    values_to_set = [5.0, 3.0, 1.0]

    for i in range(len(indexes)):
        processed_peaks_and_troughs_df.loc[indexes[i], feature] = values_to_set[i]

if date == "03-29-24":
    feature = "TYPE"
    indexes = processed_peaks_and_troughs_df[processed_peaks_and_troughs_df.isnull().any(axis=1)].index
    processed_peaks_and_troughs_df.loc[indexes, feature] = "PEAK"

if date == "08-09-24":
    feature = "CYCLE"
    indexes = [1248]
    for index in indexes:
        processed_peaks_and_troughs_df.loc[index, feature] = 1.0

    feature = "TYPE"
    indexes = [4647, 4969]
    values_to_set = ["TROUGH", "PEAK"]
    for i in range(len(indexes)):
        processed_peaks_and_troughs_df.loc[indexes[i], feature] = values_to_set[i]

if date == "08-23-24":
    feature = "CYCLE"
    indexes = [5321]
    values_to_set = [15.0]
    for i in range(len(indexes)):
        processed_peaks_and_troughs_df.loc[indexes[i], feature] = values_to_set[i]

if date == "08-28-24":
    feature = "TYPE"
    indexes = processed_peaks_and_troughs_df[processed_peaks_and_troughs_df.isnull().any(axis=1)].index
    values_to_set = ["PEAK"] * len(indexes)
    for i in range(len(indexes)):
        processed_peaks_and_troughs_df.loc[indexes[i], feature] = values_to_set[i]

if date == "08-16-24":
    feature = "TYPE"
    indexes = processed_peaks_and_troughs_df[processed_peaks_and_troughs_df.isnull().any(axis=1)].index
    values_to_set = ["TROUGH"] * len(indexes)
    for i in range(len(indexes)):
        processed_peaks_and_troughs_df.loc[indexes[i], feature] = values_to_set[i]


In [157]:
print(processed_peaks_and_troughs_df.isnull().sum())
# Check that there are no more nans
if processed_peaks_and_troughs_df.isnull().values.any():
    print("There are still NaNs")
else:
    print("There are no more NaNs")

TRACK_ID    0
TIME        0
RATIO       0
TYPE        0
CYCLE       0
POSITION    0
dtype: int64
There are no more NaNs


# Combine peaks/troughs data with track features

In [158]:
# Load position and radius features
position_features_df = pd.read_csv(data_location + rf"\{date}_position_and_radius_features.csv")

In [159]:
# Add position and radius features to all_peaks_and_troughs_df, on POSITION and TRACK_ID
all_data = pd.merge(processed_peaks_and_troughs_df, position_features_df, on=["POSITION", "TRACK_ID"])
# Remove Unnamed columns
all_data = all_data.loc[:, ~all_data.columns.str.contains('^Unnamed')]

# Save

In [160]:
# Save all data
all_data.to_csv(data_location + rf"\{date}_all_features_combined.csv", index=False)