# This notebook will read the timeoffset files generated from phenocamTemperature and reformat the csv and new dates for a cleaner workflow

In [1]:
import seaborn as sb
import pandas as pd
import numpy as np
from pathlib import Path
import re
import os

# Read image filenames

In [2]:
def extractFileData(row):
    filename = row["filename"]
    serial,datetime = filename.split("_",1)
    whatType = "RGB"
    if "liveview" in filename.lower():
        return None, None, None
    elif "_ir" in datetime.lower():
        whatType = "NIR"
        datetime = datetime.replace("_IR","")

    elif "_rgb" in datetime.lower():
        whatType = "RGB"
        datetime = datetime.replace("_RGB","")
    elif "_nir" in datetime.lower():
        whatType = "NIR"
        datetime = datetime.replace("_NIR","")
    return serial,datetime,whatType
    
        

picturePath = Path("/media/dan/ITEX-AON PhenoCam Image MASTER1/Phenocam/")
# picturePath = Path("/mnt/databackup/phenocam2016-2021/2017/2017_Brw_MISP_RC0001/")
pictureFilesnames = list(picturePath.glob("**/*.jpeg"))
pictureFilesnames += list(picturePath.glob("**/*.jpg"))
pathSeries = pd.Series([x.parent for x in pictureFilesnames])
suffixSeries = pd.Series([x.suffix for x in pictureFilesnames])


files = pd.DataFrame([x.stem for x in pictureFilesnames],columns=["filename"])
files["srcPath"] = pathSeries
files["suffix"] = suffixSeries
files[["serial","datetime","type"]] = files.apply(extractFileData,axis=1,result_type='expand')
# files["datetime"] = pd.to_datetime(files["datetime"], format="%Y%m%d_%H%M%S")

files = files[files['serial'].notna()]
files

Unnamed: 0,filename,srcPath,suffix,serial,datetime,type
0,RC01_20160811_010014_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,20160811_010014,NIR
1,RC01_20160811_020014,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,20160811_020014,RGB
2,RC01_20160811_020014_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,20160811_020014,NIR
3,RC01_20160811_030015,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,20160811_030015,RGB
4,RC01_20160811_030015_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,20160811_030015,NIR
...,...,...,...,...,...,...
18544,RC0016_20210816_080002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,20210816_080002-0800,RGB
18545,RC0016_20210816_090002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,20210816_090002-0800,RGB
18546,RC0016_20210816_100002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,20210816_100002-0800,RGB
18547,RC0016_20210816_110002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,20210816_110002-0800,RGB


In [3]:
#convert datetime to datetime type
#df['datetime'] = pd.to_datetime(df['datetime'])
files['datetime'] = files['datetime'].str.replace("-[0-9][0-9][0-9][0-9]","",regex=True)
date_format1 = "%Y%m%d_%H%M%S"
date_format2 = "%Y%m%d_%H%M%S%z"
datetime = pd.to_datetime(files['datetime'], format=date_format1,errors="coerce")
# datetime_tz = pd.to_datetime(files['datetime'], format=date_format2,errors="coerce")
# datetime_tz = datetime_tz.tz_localize(None)
# datetime.fillna(datetime_tz,inplace=True)
# datetime = pd.to_datetime(datetime)
files["datetime"] = datetime
# datetime

In [4]:
files[files.serial == "RC0016"]

Unnamed: 0,filename,srcPath,suffix,serial,datetime,type
16211,RC0016_20210705_160021-0800_NIR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-07-05 16:00:21,NIR
16212,RC0016_20210705_170007-0800_NIR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-07-05 17:00:07,NIR
16213,RC0016_20210705_180020-0800_NIR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-07-05 18:00:20,NIR
16214,RC0016_20210705_190021-0800_NIR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-07-05 19:00:21,NIR
16215,RC0016_20210705_200021-0800_NIR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-07-05 20:00:21,NIR
...,...,...,...,...,...,...
18544,RC0016_20210816_080002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 08:00:02,RGB
18545,RC0016_20210816_090002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 09:00:02,RGB
18546,RC0016_20210816_100002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 10:00:02,RGB
18547,RC0016_20210816_110002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 11:00:02,RGB




## Read CSV file

In [5]:
# f = Path("time_dets_2019_RC0012_daystep_7day_edited.csv")
# f = Path("/mnt/databackup/phenocam2016-2021/timeoffsets/time_dets_full_RC0001_daystep_7day_edited.csv")
f = Path("/mnt/databackup/phenocam2016-2021/timeoffsets/offsetsFinal/")
offsetPaths = f.glob("**/*.csv")
# offsetPaths = [Path("/mnt/databackup/phenocam2016-2021/timeoffsets/offsetsFinal/time_dets_full_RC0001_daystep_7day_final.csv")]

def readOffsetCsv(f):
    fname = f.stem
    pattern = r'RC\d+'
    match = re.search(pattern, fname)
    if match:
        serial = match.group()
    else:
        serial = None
    df = pd.read_csv(f,index_col=0)
    df["serial"] = serial
    #Remove any unnamed columns
    df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
    df.dropna(inplace = True)
    #drop unnnecesary columns, predictedDates will be dropped because we will regenerate this
    df.drop(["datapoints","predictedDates"],axis=1,inplace=True)
    return df

dfs = []
for offsetPath in offsetPaths:
    dfs.append(readOffsetCsv(offsetPath))

df = pd.concat(dfs,ignore_index=True)
df.datetime = pd.to_datetime(df.datetime)
print(df.head())
# df


             datetime         timeoffset serial
0 2016-08-10 13:00:00  -1 days +15:00:00   RC01
1 2016-08-11 13:00:00  -1 days +15:00:00   RC01
2 2016-06-11 07:00:00  -1 days +15:00:00   RC03
3 2016-06-12 07:00:00  -1 days +15:00:00   RC03
4 2016-06-13 07:00:00  -1 days +15:00:00   RC03


In [6]:
#I had to do this for a specific csv file where the way the datetime was formatted was different than the rest
#df = pd.read_csv(Path("/mnt/databackup/phenocam2016-2021/timeoffsets/offsetsFinal/time_dets_full_RC0001_daystep_7day_final.csv"))
#df['datetime'] = pd.to_datetime(df['datetime'])
#df.to_csv("/mnt/databackup/phenocam2016-2021/timeoffsets/offsetsFinal/time_dets_full_RC0001_daystep_7day_final_resaved.csv")

In [7]:
# Extract days and hours using regular expressions
df['daysoffset'] = df['timeoffset'].str.extract(r'(-?\d+) days')
df['hoursoffset'] = df['timeoffset'].str.extract(r'(-?\d+:\d+:\d+)')
df['daysoffset'] = pd.to_numeric(df['daysoffset'], errors='coerce')
# Strip time and seconds from hoursoffset and convert to numeric
df['hoursoffset'] = pd.to_datetime(df['hoursoffset'], format='%H:%M:%S').dt.hour



df.head()

Unnamed: 0,datetime,timeoffset,serial,daysoffset,hoursoffset
0,2016-08-10 13:00:00,-1 days +15:00:00,RC01,-1,15
1,2016-08-11 13:00:00,-1 days +15:00:00,RC01,-1,15
2,2016-06-11 07:00:00,-1 days +15:00:00,RC03,-1,15
3,2016-06-12 07:00:00,-1 days +15:00:00,RC03,-1,15
4,2016-06-13 07:00:00,-1 days +15:00:00,RC03,-1,15


## Generate new datetimes

In [8]:
#This is just to generate new datetimes for the dataoffset csv.  
def getNewDate(row):
    oldDate = row["datetime"]
    days = row["daysoffset"]
    hours = row["hoursoffset"]
    if pd.isna(days) or pd.isna(hours):
        return None
    
    newDate = oldDate - pd.DateOffset(days=days,hours=hours)
    return newDate

In [9]:
df["predictedDates"] = df.apply(getNewDate,axis=1)
# df.index=df["datetime"]

In [10]:
files.loc[:4]

Unnamed: 0,filename,srcPath,suffix,serial,datetime,type
0,RC01_20160811_010014_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 01:00:14,NIR
1,RC01_20160811_020014,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 02:00:14,RGB
2,RC01_20160811_020014_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 02:00:14,NIR
3,RC01_20160811_030015,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 03:00:15,RGB
4,RC01_20160811_030015_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 03:00:15,NIR


In [11]:
#This is to create new datetimes for a dataframe of timestamped images
# i = abs((df["datetime"] - files["datetime"].iloc[0])).idxmin()
# closest_datetime = df.iloc[i]["datetime"]
# print(closest_datetime)
# print(df.iloc[i])

def correctTime(row,df):
    oldDate = row["datetime"]
    #Find the closest date in the offsets dataframe
    datediffs = abs((df["datetime"] - oldDate))
    minDiff = datediffs.min()
    i = datediffs.idxmin()
    # print(i,df.shape,datediffs.shape)

    closest_date,days,hours = df.loc[i,["datetime","daysoffset","hoursoffset"]]
    days = int(days)
    hours = int(hours)
    # print(i)

    # print(df.loc[i,["datetime","daysoffset","hoursoffset"]])
    # print("df group")
    # print(df.loc[i])
    # print("row")
    # print(row)
    # print("mindiff")
    # print(minDiff)
    # print("days and hours")
    # print(days,hours)
    # print("+++++++++++++++++++")

    # print(closest_date,days,hours)
    
    # Check if minDiff is less than 2 days
    if minDiff > pd.Timedelta('3 days'):
        return None, None, None, None, None
    # days = int(df.loc[i]["daysoffset"])
    # hours = int(df.loc[i]["hoursoffset"])
    if pd.isna(days) or pd.isna(hours):
        return None, None, None, None, None
    newDate = oldDate - pd.DateOffset(days=days,hours=hours)
    
    newFileName = "_".join([row.serial,newDate.strftime("%Y%m%d_%H%M%S"),row.type,"timeFix"])
    # print(closest_date,newDate,days,hours)
    # print("########################################################")
    return closest_date,newDate,days,hours,newFileName

grouped = df.groupby('serial')
r = 0
offsetdfs = []
for name, group in grouped:
    mask = files['serial'] == name
    try:
        # ff.loc[mask,["closest_date","newDateTime","daysoffset","hoursoffset"]] = ff.loc[mask].apply(correctTime, df=group, axis=1,result_type='expand')
        output = files.loc[mask].apply(correctTime, df=group, axis=1,result_type='expand')
        if not output.empty:
            output.columns = ["closest_date","newDateTime","daysoffset","hoursoffset","new_filename"]
            offsetdfs.append(output)
    except KeyError as e:
        print("could not find serial",name,"in the list of files")
        print(e)
offsetdf = pd.concat(offsetdfs)
newDates = pd.concat([files,offsetdf],axis=1)

newDates
# files[["closest_date","newDateTime","daysoffset","hoursoffset"]] = files.apply(correctTime, df=df, axis=1,result_type='expand')

Unnamed: 0,filename,srcPath,suffix,serial,datetime,type,closest_date,newDateTime,daysoffset,hoursoffset,new_filename
0,RC01_20160811_010014_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 01:00:14,NIR,2016-08-11 13:00:00,2016-08-11 10:00:14,-1.0,15.0,RC01_20160811_100014_NIR_timeFix
1,RC01_20160811_020014,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 02:00:14,RGB,2016-08-11 13:00:00,2016-08-11 11:00:14,-1.0,15.0,RC01_20160811_110014_RGB_timeFix
2,RC01_20160811_020014_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 02:00:14,NIR,2016-08-11 13:00:00,2016-08-11 11:00:14,-1.0,15.0,RC01_20160811_110014_NIR_timeFix
3,RC01_20160811_030015,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 03:00:15,RGB,2016-08-11 13:00:00,2016-08-11 12:00:15,-1.0,15.0,RC01_20160811_120015_RGB_timeFix
4,RC01_20160811_030015_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 03:00:15,NIR,2016-08-11 13:00:00,2016-08-11 12:00:15,-1.0,15.0,RC01_20160811_120015_NIR_timeFix
...,...,...,...,...,...,...,...,...,...,...,...
18544,RC0016_20210816_080002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 08:00:02,RGB,2021-08-16 01:00:00,2021-08-16 16:00:02,-1.0,16.0,RC0016_20210816_160002_RGB_timeFix
18545,RC0016_20210816_090002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 09:00:02,RGB,2021-08-16 01:00:00,2021-08-16 17:00:02,-1.0,16.0,RC0016_20210816_170002_RGB_timeFix
18546,RC0016_20210816_100002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 10:00:02,RGB,2021-08-16 01:00:00,2021-08-16 18:00:02,-1.0,16.0,RC0016_20210816_180002_RGB_timeFix
18547,RC0016_20210816_110002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 11:00:02,RGB,2021-08-16 01:00:00,2021-08-16 19:00:02,-1.0,16.0,RC0016_20210816_190002_RGB_timeFix


In [17]:
newDates.to_csv("Corrected_timestamp_test.csv")

In [12]:
newDates

Unnamed: 0,filename,srcPath,suffix,serial,datetime,type,closest_date,newDateTime,daysoffset,hoursoffset,new_filename
0,RC01_20160811_010014_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 01:00:14,NIR,2016-08-11 13:00:00,2016-08-11 10:00:14,-1.0,15.0,RC01_20160811_100014_NIR_timeFix
1,RC01_20160811_020014,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 02:00:14,RGB,2016-08-11 13:00:00,2016-08-11 11:00:14,-1.0,15.0,RC01_20160811_110014_RGB_timeFix
2,RC01_20160811_020014_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 02:00:14,NIR,2016-08-11 13:00:00,2016-08-11 11:00:14,-1.0,15.0,RC01_20160811_110014_NIR_timeFix
3,RC01_20160811_030015,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 03:00:15,RGB,2016-08-11 13:00:00,2016-08-11 12:00:15,-1.0,15.0,RC01_20160811_120015_RGB_timeFix
4,RC01_20160811_030015_IR,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpeg,RC01,2016-08-11 03:00:15,NIR,2016-08-11 13:00:00,2016-08-11 12:00:15,-1.0,15.0,RC01_20160811_120015_NIR_timeFix
...,...,...,...,...,...,...,...,...,...,...,...
18544,RC0016_20210816_080002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 08:00:02,RGB,2021-08-16 01:00:00,2021-08-16 16:00:02,-1.0,16.0,RC0016_20210816_160002_RGB_timeFix
18545,RC0016_20210816_090002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 09:00:02,RGB,2021-08-16 01:00:00,2021-08-16 17:00:02,-1.0,16.0,RC0016_20210816_170002_RGB_timeFix
18546,RC0016_20210816_100002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 10:00:02,RGB,2021-08-16 01:00:00,2021-08-16 18:00:02,-1.0,16.0,RC0016_20210816_180002_RGB_timeFix
18547,RC0016_20210816_110002-0800_RGB,/media/dan/ITEX-AON PhenoCam Image MASTER1/Phe...,.jpg,RC0016,2021-08-16 11:00:02,RGB,2021-08-16 01:00:00,2021-08-16 19:00:02,-1.0,16.0,RC0016_20210816_190002_RGB_timeFix


In [13]:
print("stats")
totalVals = newDates.shape[0]
missingVals = newDates.newDateTime.isna().sum()
missingVals_noRC3 = newDates.newDateTime[newDates.serial != "RC0003"].isna().sum()
missingRatio = missingVals / totalVals
print("Total values:",totalVals)
print("Missing values:",missingVals)
print("Missing values not including RC0003:",missingVals_noRC3)
print("percent of corrected data:",round((1-missingRatio)*100),"%")

stats
Total values: 18537
Missing values: 5047
Missing values not including RC0003: 4832
percent of corrected data: 73 %


# Saving new files

In [14]:
from tqdm.auto import tqdm
import shutil

tqdm.pandas()

savePath = Path("/media/dan/ITEX-AON PhenoCam Image MASTER1/Phenocam_correctedDate2/")

def copyFiles(row,destPath):
    try:
        if not pd.isna(row.new_filename):
            newDateTime = row.newDateTime
            sfx = row.suffix
            src = row.srcPath / row.filename

            src = src.with_suffix(sfx)
            
            serial = row.serial

            year = newDateTime.strftime("%Y")
            month = newDateTime.strftime("%m")
            dest = destPath / year / serial / row.type / row.new_filename
            dest = dest.with_suffix(".jpg")
            os.makedirs(dest.parent,exist_ok = True)
            
            if not os.path.isfile(dest):
                shutil.copyfile(src, dest)
            return True
        else:
            return False
    except:
        return False
    
    

correctlyTransfered2 = newDates.progress_apply(copyFiles, destPath=savePath, axis=1,result_type='expand')

  0%|          | 0/18537 [00:00<?, ?it/s]

In [25]:
bothTransfered = correctlyTransfered + correctlyTransfered2

In [15]:
(correctlyTransfered2).sum()

13490