In [58]:
import pandas as pd
import glob
import os

In [59]:
# pip install itables
from itables import init_notebook_mode

init_notebook_mode(all_interactive=True)
from itables import show
# show(df, maxBytes=0)

# Showing index
import itables.options as opt
opt.showIndex = True

# Turning off downsampling of data while printing it
import itables.options as opt
opt.maxBytes = 0

# Importing Data

In [60]:
path_compiled_data = r"D:\PhenoCrop\3_qgis\OAT_FRONTIERS\2. pivoted"
all_files_final = glob.glob(path_compiled_data + "/*.xlsx")
all_files_final

['D:\\PhenoCrop\\3_qgis\\OAT_FRONTIERS\\2. pivoted\\Gene2Bread 2024 Vollebekk.xlsx',
 'D:\\PhenoCrop\\3_qgis\\OAT_FRONTIERS\\2. pivoted\\OatFrontiers 2024 Vollebekk.xlsx']

In [61]:
path_agro_data = r"D:\PhenoCrop\3_qgis\OAT_FRONTIERS\AGRO DATA"
all_files_agro = glob.glob(path_agro_data + "/*.xlsx")
all_files_agro

['D:\\PhenoCrop\\3_qgis\\OAT_FRONTIERS\\AGRO DATA\\GENE2BREAD Registreringer G2B_Vollebekk 2024_1.xlsx']

In [62]:
# Importing all the datasets except those from Mavic3RGB images since we need the Multispectral data only

list_df_compiled = []
for filepath in all_files_final:
    file_name=os.path.basename(filepath).split(".")[0].replace('-','_')

    list_df_compiled.append(file_name)
    
    # print(filepath,os.path.basename(filepath[:-5]))
    locals()[file_name] = pd.read_excel(filepath)
    print(file_name, '=====', locals()[file_name].shape)

Gene2Bread 2024 Vollebekk ===== (14580, 19)
OatFrontiers 2024 Vollebekk ===== (12960, 19)


In [63]:
list_df_compiled

['Gene2Bread 2024 Vollebekk', 'OatFrontiers 2024 Vollebekk']

In [64]:
df_g2b = locals()['Gene2Bread 2024 Vollebekk']
df_oatf = locals()['OatFrontiers 2024 Vollebekk']

In [65]:
# Importing all the datasets except those from Mavic3RGB images since we need the Multispectral data only

list_df_agro = []
for filepath in all_files_agro:
    file_name=os.path.basename(filepath).split(".")[0].replace('-','_')

    list_df_agro.append(file_name)
    
    # print(filepath,os.path.basename(filepath[:-5]))
    locals()[file_name] = pd.read_excel(filepath)
    print(file_name, '=====', locals()[file_name].shape)

GENE2BREAD Registreringer G2B_Vollebekk 2024_1 ===== (100, 20)


In [66]:
df_g2b_Agro = locals()['GENE2BREAD Registreringer G2B_Vollebekk 2024_1']

In [71]:
df_g2b_Agro.head(2)

Unnamed: 0,Rute,REP,Name,Aksskyting,Høyde (cm) 23/7,Unnamed: 5,Høyde (cm),GM,Legde % v høsting,Legde % 1/8,Legde % 14/8,Råvekt (kg),Tørrvekt (kg),vann% v høsting,kg 15% vann,kg/daa 15% vann,Hl (kg/hl),Protein %,TKV(g),GPD
Loading ITables v2.2.3 from the init_notebook_mode cell... (need help?),,,,,,,,,,,,,,,,,,,,


In [69]:
df_g2b.head(2)

Unnamed: 0,ID,Location,Field,Camera,Flight Height,count,majority,max,mean,median,min,minority,range,stdev,sum,variance,variety,Date,Spectrum
Loading ITables v2.2.3 from the init_notebook_mode cell... (need help?),,,,,,,,,,,,,,,,,,,


# Replacing the  - G2B text from the IDs in the Gene2Bread spectral df

In [99]:
df_g2b['ID'] = df_g2b['ID'].str.replace(' - G2B','').astype(int)

In [100]:
df_g2b.head(2)

Unnamed: 0,ID,Location,Field,Camera,Flight Height,count,majority,max,mean,median,min,minority,range,stdev,sum,variance,variety,Date,Spectrum
Loading ITables v2.2.3 from the init_notebook_mode cell... (need help?),,,,,,,,,,,,,,,,,,,


# Replacing the  - OatFrontiers text from the IDs in the OatFrontiers spectral df

In [78]:
df_oatf['ID'] = df_oatf['ID'].str.replace(' - OatFrontiers','')

In [79]:
df_oatf.head()

Unnamed: 0,ID,Location,Field,Camera,Flight Height,count,majority,max,mean,median,min,minority,range,stdev,sum,variance,variety,Date,Spectrum
Loading ITables v2.2.3 from the init_notebook_mode cell... (need help?),,,,,,,,,,,,,,,,,,,


# Correcting the datatype of ID Entries

The datatype of ID entries in Spectral DFs is str but to be able to merge it with Agro data, it should be int64

In [103]:
import pandas as pd

def convert_column_to_int(df, column_name):
    """
    Converts values in a specified column to integers if possible.
    Non-convertible values (strings or other types) are left unchanged.

    Parameters:
    - df (pd.DataFrame): The DataFrame containing the column.
    - column_name (str): The name of the column to convert.

    Returns:
    - pd.DataFrame: The DataFrame with the column updated.
    """
    def safe_convert(value):
        try:
            # Try to convert to integer
            return int(value)
        except ValueError:
            # If conversion fails, return the value as it is
            return value

    # Apply the safe_convert function to the specified column
    df[column_name] = df[column_name].apply(safe_convert)

    return df


In [104]:
convert_column_to_int(df_oatf, 'ID')

Unnamed: 0,ID,Location,Field,Camera,Flight Height,count,majority,max,mean,median,min,minority,range,stdev,sum,variance,variety,Date,Spectrum
Loading ITables v2.2.3 from the init_notebook_mode cell... (need help?),,,,,,,,,,,,,,,,,,,


In [113]:
df_oatf.to_excel(os.path.join(path_compiled_data, "Oats 2024 Vollebekk with Agro.xlsx"), index=False)

# Comparing the IDs in the both dfs

Border plots are not usually part of the Agro Data. So there has to be a mismatch in IDs from Agro Data and the spectral data

In [160]:
field_name = 'Gene2Bread 2024'
project_name = 'Gene2Bread'
location = "Vollebekk"
year = "2024"

In [161]:
# Merge df1 and df2 based on matching 'ID' and 'Rute'
merged_df = pd.merge(df_g2b, df_g2b_Agro, left_on='ID', right_on='Rute', how='left')

merged_df.drop(columns=['Rute'], inplace=True)

In [163]:
output_csv = os.path.join(path_compiled_data, field_name+" Vollebekk Spectral+Agro.csv")
output_xlsx = os.path.join(path_compiled_data, field_name+" Vollebekk Spectral+Agro.xlsx")

# merged_df.to_csv(output_csv, index=False, encoding='utf-8-sig')

merged_df.to_excel(output_xlsx, index=False)

# Extracting Flight Log from list of paths

In [191]:
separated_data = r'D:\PhenoCrop\3_qgis\OAT_FRONTIERS\1. separated'

In [192]:
all_files_final = glob.glob(separated_data + "/*.xlsx")

In [193]:
all_files_proj = [path for path in all_files_final if "gene" in path.lower() and  "pivoted" in path.lower()]

In [194]:
import os
import pandas as pd
        
# Function to extract and split the base name
def extract_data(file_path):
    base_name = os.path.basename(file_path)
    parts = base_name.replace('_pivoted.xlsx', '').split()
    
    if "p4m" in base_name.lower():
        camera = "Phantom4M"
    elif "m3m" in base_name.lower():
        camera = "Mavic3M"
        
    return {
        "Flight Date": parts[0],
        "Location": location,
        "Project": project_name,
        "Year": year,
        "Drone Model": camera,
        "Flight Height": parts[3],
        "Data Type": parts[4],
        "Side Overlap": parts[5],
        "Front Overlap": parts[6]
    }

In [199]:
# Sample list of file paths
file_paths = all_files_proj

# Extract data for each file path
data = [extract_data(path) for path in file_paths]

# Create DataFrame
df_flightlog = pd.DataFrame(data)

# Convert 'Flight Date' to datetime format
df_flightlog['Flight Date'] = pd.to_datetime(df_flightlog['Flight Date'], format='%Y%m%d')

convert_column_to_int(df_flightlog, 'Side Overlap')
convert_column_to_int(df_flightlog, 'Year')
convert_column_to_int(df_flightlog, 'Front Overlap')


# Display the DataFrame
# show(df_flightlog)


Unnamed: 0,Flight Date,Location,Project,Year,Drone Model,Flight Height,Data Type,Side Overlap,Front Overlap
Loading ITables v2.2.3 from the init_notebook_mode cell... (need help?),,,,,,,,,


In [200]:
# Rearranging the columns
df_flightlog = df_flightlog[['Flight Date',
                             'Drone Model',
                             'Location',
                             'Project',
                             'Year',
                             'Flight Height',
                             'Data Type',
                             'Side Overlap',
                             'Front Overlap']]
df_flightlog.head()

Unnamed: 0,Flight Date,Drone Model,Location,Project,Year,Flight Height,Data Type,Side Overlap,Front Overlap
Loading ITables v2.2.3 from the init_notebook_mode cell... (need help?),,,,,,,,,


# Add the flight log as a sheet to the already generated compiled data

In [201]:
# Specify the path to the existing Excel file
excel_file_path = os.path.join(path_compiled_data, field_name+" Vollebekk Spectral+Agro.xlsx")

In [202]:
# Load the existing Excel file
with pd.ExcelWriter(excel_file_path, engine='openpyxl', mode='a') as writer:
    # Add the DataFrame as a new sheet
    df_flightlog.to_excel(writer, sheet_name='flightlog', index=False)

print("DataFrame added as a new sheet successfully!")

DataFrame added as a new sheet successfully!
