In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

In [2]:
date = "10-28-22"
data_location = rf"E:\Project 6 - Temperature\Experiments\data_analysis\{date}"

# Load all data combined

In [3]:
all_data = pd.read_csv(data_location + rf"\{date}_all_features_combined.csv")
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

In [4]:
# Row 1
peaks_row_1 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] <= 12)]
troughs_row_1 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] <= 12)]
# Row 2
peaks_row_2 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] > 12)]
troughs_row_2 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] > 12)]

# Visualize data before renumbering

In [None]:
px.scatter(peaks_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR")

In [None]:
px.scatter(troughs_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR")

In [None]:
px.scatter(peaks_row_2, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR")

In [None]:
px.scatter(troughs_row_2, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR")

# First round of cycle number fixes

## Peaks

In [5]:
# On row 1, from position 2 to 8, add 1 to cycle number to peaks
peaks = all_data[all_data["TYPE"] == "PEAK"]
places_to_add = peaks.loc[(peaks["POSITION"] >= 2) & (peaks["POSITION"] <= 8)].index 
all_data.loc[places_to_add, "CYCLE"] = all_data.loc[places_to_add, "CYCLE"] + 1
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

In [6]:
# Row 1
peaks_row_1 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] <= 12)]
troughs_row_1 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] <= 12)]
# Row 2
peaks_row_2 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] > 12)]
troughs_row_2 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] > 12)]

### Debug peaks per position

In [None]:
px.scatter(peaks_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR")

In [None]:
px.scatter(troughs_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR")

In [None]:
position = 0
df = peaks_row_1[peaks_row_1["POSITION"] == position] # Remember to change row if necessary
px.scatter(df, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR", hover_data=["POSITION_STR", "TRACK_ID"],)

### Second round of cycle number fixes

In [7]:
# Track ids per position that need +1 on their cycle number
row_1_selection = peaks_row_1[peaks_row_1["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
row_2_selection = peaks_row_2[peaks_row_2["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
plus_one_track_ids = {
    'Pos1': [140, 82],
    'Pos3': [125],
    'Pos4': [188],
    'Pos5': [274],
    'Pos9': row_1_selection[(row_1_selection["POSITION"] == 9) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos10': [36, 4, 41, 42],
    'Pos13': [161],
    'Pos15': [277, 90, 122, 22, 158],
    # Any track id whose cycle 1 time is larger than 50
    'Pos16': row_2_selection[(row_2_selection["POSITION"] == 16) & (row_2_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos17': row_2_selection[(row_2_selection["POSITION"] == 17) & (row_2_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos18': row_2_selection[(row_2_selection["POSITION"] == 18) & (row_2_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos19': row_2_selection[(row_2_selection["POSITION"] == 19) & (row_2_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos21': row_2_selection[(row_2_selection["POSITION"] == 21) & (row_2_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
}

In [8]:
# Get all indices where we need to add 1 to the cycle number 
indices_to_add = []
peaks = all_data[all_data["TYPE"] == "PEAK"]
for pos, track_ids in plus_one_track_ids.items():
    for track_id in track_ids:
        indices = peaks[(peaks["POSITION"] == int(pos[3:])) & (peaks["TRACK_ID"] == track_id)].index.tolist()
        indices_to_add.extend(indices)

all_data.loc[indices_to_add, "CYCLE"] = all_data.loc[indices_to_add, "CYCLE"] + 1
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

In [9]:
# Row 1
peaks_row_1 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] <= 12)]
troughs_row_1 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] <= 12)]
# Row 2
peaks_row_2 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] > 12)]
troughs_row_2 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] > 12)]

### Check result

In [None]:
px.scatter(peaks_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR",
           hover_data=["POSITION_STR", "TRACK_ID"],)

In [None]:
position = 16
df = peaks_row_2[peaks_row_2["POSITION"] == position]
px.scatter(df, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR", hover_data=["POSITION_STR", "TRACK_ID"],)

## Troughs

### Plot

In [None]:
position = 21
df = troughs_row_2[troughs_row_2["POSITION"] == position] # Remember to change row if necessary
px.scatter(df, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR", hover_data=["POSITION_STR", "TRACK_ID"],)

### Renumber

In [10]:
# Track ids per position that need +1 on their cycle number
row_1_selection = troughs_row_1[troughs_row_1["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
row_2_selection = troughs_row_2[troughs_row_2["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
plus_one_track_ids = {
    'Pos0': [397],
    'Pos1': row_1_selection[(row_1_selection["POSITION"] == 1) & (row_1_selection["TIME"] > 100)]["TRACK_ID"].tolist(),
    'Pos2': row_1_selection[(row_1_selection["POSITION"] == 2) & (row_1_selection["TIME"] > 70)]["TRACK_ID"].tolist(),
    'Pos3': row_1_selection[(row_1_selection["POSITION"] == 3) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos4': row_1_selection[(row_1_selection["POSITION"] == 4) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos5': row_1_selection[(row_1_selection["POSITION"] == 5) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos6': row_1_selection[(row_1_selection["POSITION"] == 6) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos7': row_1_selection[(row_1_selection["POSITION"] == 7) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos8': row_1_selection[(row_1_selection["POSITION"] == 8) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos9': row_1_selection[(row_1_selection["POSITION"] == 9) & (row_1_selection["TIME"] > 60)]["TRACK_ID"].tolist(),
    'Pos10': [4, 36],
    'Pos15': [22, 27],
    'Pos16': row_2_selection[(row_2_selection["POSITION"] == 16) & (row_2_selection["TIME"] > 80)]["TRACK_ID"].tolist(),
    'Pos17': row_2_selection[(row_2_selection["POSITION"] == 17) & (row_2_selection["TIME"] > 80)]["TRACK_ID"].tolist(),
    'Pos18': row_2_selection[(row_2_selection["POSITION"] == 18) & (row_2_selection["TIME"] > 80)]["TRACK_ID"].tolist(),
    'Pos21': row_2_selection[(row_2_selection["POSITION"] == 21) & (row_2_selection["TIME"] > 80)]["TRACK_ID"].tolist(),
}

In [11]:
# Get all indices where we need to add 1 to the cycle number 
indices_to_add = []
troughs = all_data[all_data["TYPE"] == "TROUGH"]
for pos, track_ids in plus_one_track_ids.items():
    for track_id in track_ids:
        indices = troughs[(troughs["POSITION"] == int(pos[3:])) & (troughs["TRACK_ID"] == track_id)].index.tolist()
        indices_to_add.extend(indices)

all_data.loc[indices_to_add, "CYCLE"] = all_data.loc[indices_to_add, "CYCLE"] + 1
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

In [12]:
# Row 1
peaks_row_1 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] <= 12)]
troughs_row_1 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] <= 12)]
# Row 2
peaks_row_2 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] > 12)]
troughs_row_2 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] > 12)]

### Check result

In [None]:
px.scatter(troughs_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR",
           hover_data=["POSITION_STR", "TRACK_ID"],)

# Save result

In [13]:
all_data.to_csv(data_location + rf"\{date}_all_features_combined_renumbered.csv", index=False)