In [1]:
import numpy as np
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt

In [2]:
date = "03-29-24"
data_location = rf"E:\Project 6 - Temperature\Experiments\data_analysis\{date}"

# Load all data combined

In [3]:
all_data = pd.read_csv(data_location + rf"\{date}_all_features_combined.csv")
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

In [4]:
# Row 1
peaks_row_1 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] <= 13)]
troughs_row_1 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] <= 13)]
# Row 2
peaks_row_2 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] > 13)]
troughs_row_2 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] > 13)]

# Visualize data before renumbering

In [6]:
px.scatter(peaks_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR", hover_data=["POSITION_STR"])

In [7]:
px.scatter(troughs_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR")

In [9]:
px.scatter(peaks_row_2, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR", hover_data=["POSITION_STR"])

In [None]:
px.scatter(troughs_row_2, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR")

# First round of cycle number fixes

## Peaks

### First round of cycle number fixes

In [6]:
# Track ids per position that need +1 on their cycle number
row_1_selection = peaks_row_1[peaks_row_1["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
row_2_selection = peaks_row_2[peaks_row_2["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
plus_one_track_ids = {
    # Row 1
    'Pos1': row_1_selection[(row_1_selection["POSITION"] == 1) & (row_1_selection["TIME"] > 75)]["TRACK_ID"].tolist(),
    'Pos2': row_1_selection[(row_1_selection["POSITION"] == 2) & (row_1_selection["TIME"] > 160)]["TRACK_ID"].tolist(),
    'Pos3': row_1_selection[(row_1_selection["POSITION"] == 3) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos4': row_1_selection[(row_1_selection["POSITION"] == 4) & (row_1_selection["TIME"] > 25)]["TRACK_ID"].tolist(),
    'Pos6': row_1_selection[(row_1_selection["POSITION"] == 6) & (row_1_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    # Row 2
    'Pos21': row_2_selection[(row_2_selection["POSITION"] == 21) & (row_2_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos22': row_2_selection[(row_2_selection["POSITION"] == 22) & (row_2_selection["TIME"] > 30)]["TRACK_ID"].tolist(),
    'Pos23': row_2_selection[(row_2_selection["POSITION"] == 23) & (row_2_selection["TIME"] > 30)]["TRACK_ID"].tolist(),
    'Pos25': row_2_selection[(row_2_selection["POSITION"] == 25) & (row_2_selection["TIME"] > 30)]["TRACK_ID"].tolist(),
    'Pos26': row_2_selection[(row_2_selection["POSITION"] == 26) & (row_2_selection["TIME"] > 30)]["TRACK_ID"].tolist(),
    'Pos27': [240],
}

In [7]:
# Get all indices where we need to add 1 to the cycle number 
indices_to_add = []
peaks = all_data[all_data["TYPE"] == "PEAK"]
for pos, track_ids in plus_one_track_ids.items():
    for track_id in track_ids:
        indices = peaks[(peaks["POSITION"] == int(pos[3:])) & (peaks["TRACK_ID"] == track_id)].index.tolist()
        indices_to_add.extend(indices)

all_data.loc[indices_to_add, "CYCLE"] = all_data.loc[indices_to_add, "CYCLE"] + 1
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

### Second round of fixes

In [8]:
# Add two cycle number to position 2 and one to position 5
position_2 = all_data[(all_data["POSITION"] == 2)].index.tolist()
all_data.loc[position_2, "CYCLE"] = all_data.loc[position_2, "CYCLE"] + 2
position_5 = all_data[(all_data["POSITION"] == 5)].index.tolist()
all_data.loc[position_5, "CYCLE"] = all_data.loc[position_5, "CYCLE"] + 1

In [9]:
position_24 = all_data[(all_data["POSITION"] == 24)].index.tolist()
all_data.loc[position_24, "CYCLE"] = all_data.loc[position_24, "CYCLE"] + 1

In [10]:
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

### Third round of fixes

In [11]:
# Track ids per position that need +1 on their cycle number
row_1_selection = peaks_row_1[peaks_row_1["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
row_2_selection = peaks_row_2[peaks_row_2["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
plus_one_track_ids = {
    # Row 1
    'Pos3': [73],
    'Pos4': [149],
    'Pos6': [154, 155, 156],
    # Row 2
    'Pos21': [0, 172],
    'Pos23': [203, 227, 229, 250],
    'Pos26': [169],
}

In [12]:
# Get all indices where we need to add 1 to the cycle number 
indices_to_add = []
peaks = all_data[all_data["TYPE"] == "PEAK"]
for pos, track_ids in plus_one_track_ids.items():
    for track_id in track_ids:
        indices = peaks[(peaks["POSITION"] == int(pos[3:])) & (peaks["TRACK_ID"] == track_id)].index.tolist()
        indices_to_add.extend(indices)

all_data.loc[indices_to_add, "CYCLE"] = all_data.loc[indices_to_add, "CYCLE"] + 1
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

### To remove

In [13]:
to_remove = [(8, 163)]
for pos, track_id in to_remove:
    index = all_data[(all_data["POSITION"] == pos) & (all_data["TRACK_ID"] == track_id)].index.tolist()
    all_data = all_data.drop(index)

### Check result

In [14]:
# Row 1
peaks_row_1 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] <= 13)]
troughs_row_1 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] <= 13)]
# Row 2
peaks_row_2 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] > 13)]
troughs_row_2 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] > 13)]

In [15]:
px.scatter(peaks_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR",
           hover_data=["POSITION_STR", "TRACK_ID"],)

In [None]:
position = 16
df = peaks_row_2[peaks_row_2["POSITION"] == position]
px.scatter(df, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR", hover_data=["POSITION_STR", "TRACK_ID"],)

## Troughs

In [16]:
# Position 16 are outliers, remove them
all_data = all_data[all_data["POSITION"] != 16]

In [17]:
# Row 1
peaks_row_1 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] <= 13)]
troughs_row_1 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] <= 13)]
# Row 2
peaks_row_2 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] > 13)]
troughs_row_2 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] > 13)]

### Renumber

In [18]:
# Track ids per position that need +1 on their cycle number
row_1_selection = troughs_row_1[troughs_row_1["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
row_2_selection = troughs_row_2[troughs_row_2["CYCLE"] == 1][["TIME", "TRACK_ID", "POSITION"]]
plus_one_track_ids = {
    # Row 1
    'Pos1': row_1_selection[(row_1_selection["POSITION"] == 1) & (row_1_selection["TIME"] > 100)]["TRACK_ID"].tolist(),
    'Pos3': row_1_selection[(row_1_selection["POSITION"] == 3) & (row_1_selection["TIME"] > 60)]["TRACK_ID"].tolist(),
    'Pos4': row_1_selection[(row_1_selection["POSITION"] == 4) & (row_1_selection["TIME"] > 60)]["TRACK_ID"].tolist(),
    'Pos5': row_1_selection[(row_1_selection["POSITION"] == 5) & (row_1_selection["TIME"] > 60)]["TRACK_ID"].tolist(),
    'Pos6': row_1_selection[(row_1_selection["POSITION"] == 6) & (row_1_selection["TIME"] > 40)]["TRACK_ID"].tolist(),
    # Row 2
    'Pos21': row_2_selection[(row_2_selection["POSITION"] == 21) & (row_2_selection["TIME"] > 60)]["TRACK_ID"].tolist(),
    'Pos22': row_2_selection[(row_2_selection["POSITION"] == 22) & (row_2_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos23': row_2_selection[(row_2_selection["POSITION"] == 23) & (row_2_selection["TIME"] > 40)]["TRACK_ID"].tolist(),
    'Pos25': row_2_selection[(row_2_selection["POSITION"] == 25) & (row_2_selection["TIME"] > 50)]["TRACK_ID"].tolist(),
    'Pos26': row_2_selection[(row_2_selection["POSITION"] == 26) & (row_2_selection["TIME"] > 70)]["TRACK_ID"].tolist(),
    'Pos27': [240],
}

In [19]:
# Get all indices where we need to add 1 to the cycle number 
indices_to_add = []
troughs = all_data[all_data["TYPE"] == "TROUGH"]
for pos, track_ids in plus_one_track_ids.items():
    for track_id in track_ids:
        indices = troughs[(troughs["POSITION"] == int(pos[3:])) & (troughs["TRACK_ID"] == track_id)].index.tolist()
        indices_to_add.extend(indices)

all_data.loc[indices_to_add, "CYCLE"] = all_data.loc[indices_to_add, "CYCLE"] + 1
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

## Second round

In [20]:
# position 6 subtract 1
position_6 = all_data[(all_data["POSITION"] == 6) & (all_data["TYPE"] == "TROUGH")].index.tolist()
all_data.loc[position_6, "CYCLE"] = all_data.loc[position_6, "CYCLE"] - 1

In [21]:
# remove index 1556
index_to_rm = 1556
all_data = all_data.drop(index_to_rm)

In [22]:
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

## Third round

In [23]:
# add two
to_add_two = [(6, 154), (6, 155), (6, 156)]
for pos, track_id in to_add_two:
    index = all_data[(all_data["POSITION"] == pos) & 
                     (all_data["TRACK_ID"] == track_id) & 
                     (all_data["TYPE"] == "TROUGH")].index.tolist()
    all_data.loc[index, "CYCLE"] = all_data.loc[index, "CYCLE"] + 2

In [24]:
# Track ids per position that need +1 on their cycle number
row_1_selection = troughs_row_1[troughs_row_1["CYCLE"] == 2][["TIME", "TRACK_ID", "POSITION"]]
plus_one_track_ids = {
    # Row 1
    'Pos5': row_1_selection[(row_1_selection["POSITION"] == 5) & (row_1_selection["TIME"] > 75)]["TRACK_ID"].tolist(),

}

In [25]:
# Get all indices where we need to add 1 to the cycle number 
indices_to_add = []
troughs = all_data[all_data["TYPE"] == "TROUGH"]
for pos, track_ids in plus_one_track_ids.items():
    for track_id in track_ids:
        indices = troughs[(troughs["POSITION"] == int(pos[3:])) & (troughs["TRACK_ID"] == track_id)].index.tolist()
        indices_to_add.extend(indices)

all_data.loc[indices_to_add, "CYCLE"] = all_data.loc[indices_to_add, "CYCLE"] + 1

In [26]:
# position 5 subtract 1
position_5 = all_data[(all_data["POSITION"] == 5) & (all_data["TYPE"] == "TROUGH")].index.tolist()
all_data.loc[position_5, "CYCLE"] = all_data.loc[position_5, "CYCLE"] - 1

In [27]:
# position 4 track_id 149 add 1
index = all_data[(all_data["POSITION"] == 4) & 
                 (all_data["TRACK_ID"] == 149) & 
                 (all_data["TYPE"] == "TROUGH")].index.tolist()
all_data.loc[index, "CYCLE"] = all_data.loc[index, "CYCLE"] + 1

In [28]:
to_add_one = [(23, 229), (23, 250), (23, 227)]
for pos, track_id in to_add_one:
    index = all_data[(all_data["POSITION"] == pos) & 
                     (all_data["TRACK_ID"] == track_id) & 
                     (all_data["TYPE"] == "TROUGH")].index.tolist()
    all_data.loc[index, "CYCLE"] = all_data.loc[index, "CYCLE"] + 1

In [29]:
all_data["CYCLE_STR"] = all_data["CYCLE"].astype(str)
all_data["POSITION_STR"] = all_data["POSITION"].astype(str)

### Check result

In [30]:
# Row 1
peaks_row_1 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] <= 12)]
troughs_row_1 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] <= 12)]
# Row 2
peaks_row_2 = all_data[(all_data["TYPE"] == "PEAK") & (all_data["POSITION"] > 12)]
troughs_row_2 = all_data[(all_data["TYPE"] == "TROUGH") & (all_data["POSITION"] > 12)]

In [34]:
px.scatter(peaks_row_1, x="TIME", y="MEAN_TEMPERATURE", color="CYCLE_STR",
           hover_data=["POSITION_STR", "TRACK_ID"],)

# Save result

In [35]:
all_data.to_csv(data_location + rf"\{date}_all_features_combined_renumbered.csv", index=False)