In [56]:
import pandas as pd
from datetime import datetime

# Function to compare times
def time_in_range(time_to_check, start_time, end_time):
    # Convert all times to datetime objects for comparison
    #time_to_check = datetime.strptime(time_to_check,'%Y-%m-%d %H:%M:%S')
    return start_time <= time_to_check <= end_time

# Load data from CSV files
file1_path = './data/LD.csv'  # Path to the first CSV file
file2_path = './data/watch-sleep-data.csv'  # Path to the second CSV file

# Read the CSV files into pandas DataFrames
df1 = pd.read_csv(file1_path, parse_dates=['Time'])
df2 = pd.read_csv(file2_path, parse_dates=['start_time', 'end_time'])

pd.to_datetime(df1['Time'], format = '%Y-%m-%d %H:%M:%S')

0        2024-12-12 23:02:37
1        2024-12-12 23:02:42
2        2024-12-12 23:02:47
3        2024-12-12 23:02:52
4        2024-12-12 23:02:57
                 ...        
100273   2024-12-26 10:35:09
100274   2024-12-26 10:35:14
100275   2024-12-26 10:35:19
100276   2024-12-26 10:35:24
100277   2024-12-26 10:35:29
Name: Time, Length: 100278, dtype: datetime64[ns]

In [57]:
# Ensure the necessary columns are present
if 'Time' not in df1.columns or 'start_time' not in df2.columns or 'end_time' not in df2.columns or 'LABEL' not in df2.columns:
    raise ValueError("Input files must contain 'TIME' (in file1.csv), 'start_time', 'end_time', and 'LABEL' (in file2.csv) columns")
    
# Create an empty 'LABEL' column in df1
df1['LABEL'] = None

In [61]:
# Loop through each row of df1 and compare its TIME with all rows in df2
for idx1, row1 in df1.iterrows():
    time_to_match = row1['Time']
    
    # Check all the time ranges in df2 to find a matching range
    for idx2, row2 in df2.iterrows():
        if row2['start_time'] <= time_to_match <= row2['end_time']:
            df1.at[idx1, 'LABEL'] = row2['LABEL']
            break  # Break once a match is found; no need to check further ranges


In [62]:
df1

Unnamed: 0,Time,visible_light,IR_light,UV_light,PM1.0 concentration(CF=1; Standard particulate matter; unit:ug/m3),PM2.5 concentration(CF=1; Standard particulate matter; unit:ug/m3),PM10 concentration(CF=1; Standard particulate matter; unit:ug/m3),PM1.0 concentration(Atmospheric environment; unit:ug/m3),PM2.5 concentration(Atmospheric environment;unit:ug/m3),PM10 concentration(Atmospheric environment;unit:ug/m3),LABEL
0,2024-12-12 23:02:37,262,257,0.03,42,58,67,32,47,55,
1,2024-12-12 23:02:42,261,258,0.02,42,58,67,32,47,55,
2,2024-12-12 23:02:47,262,258,0.03,42,58,67,32,47,55,
3,2024-12-12 23:02:52,262,259,0.03,42,58,67,32,47,55,
4,2024-12-12 23:02:57,264,258,0.04,42,58,67,32,47,55,
...,...,...,...,...,...,...,...,...,...,...,...
100273,2024-12-26 10:35:09,263,297,0.03,18,26,30,18,26,30,
100274,2024-12-26 10:35:14,263,290,0.03,19,27,31,19,27,31,
100275,2024-12-26 10:35:19,262,285,0.03,18,26,30,18,26,30,
100276,2024-12-26 10:35:24,264,286,0.04,18,26,30,18,26,30,


In [63]:
# Save the updated df1 to a new CSV
df1.to_csv('updated_LD.csv', index=False)

print("Process completed. The updated file has been saved as 'updated_file1.csv'.")

Process completed. The updated file has been saved as 'updated_file1.csv'.


In [16]:
import pandas as pd

# Path to your CSV file
csv_file_path = 'data/THS_with_labels.csv'  # Replace this with your actual file path

# Import the CSV file into a DataFrame
data = pd.read_csv(csv_file_path, skipinitialspace=True, index_col=False)
df = pd.DataFrame(data=data)

df = df[df['Label'] != 'No Match']

# Optionally, save the modified DataFrame back to a CSV file
df.to_csv('FINAL_THS.csv', index=False)

# Display the modified DataFrame
print(df)


                      Time  Column1  Column2  Column3       Label
519    2024-12-12 23:48:25    27.47   127.69     45.0       InBed
520    2024-12-12 23:48:30    27.50   127.81     45.0       InBed
521    2024-12-12 23:48:35    27.47   127.81     45.0       InBed
522    2024-12-12 23:48:41    27.47   127.75     46.0       InBed
523    2024-12-12 23:48:46    27.47   127.81     45.0       InBed
...                    ...      ...      ...      ...         ...
98789  2024-12-26 10:21:46    30.12    93.31     11.0  AsleepCore
98790  2024-12-26 10:21:51    30.09    93.31     12.0  AsleepCore
98791  2024-12-26 10:21:57    30.09    93.44     11.0  AsleepCore
98792  2024-12-26 10:22:01    30.12    93.50     11.0  AsleepCore
98793  2024-12-26 10:22:07    30.09    93.50     12.0  AsleepCore

[74226 rows x 5 columns]


In [None]:
csv_file_path = 'FINAL_LD.csv'  # Replace this with your actual file path

# Import the CSV file into a DataFrame
data = pd.read_csv(csv_file_path, skipinitialspace=True, index_col=False)
df = pd.DataFrame(data=data)
