# 2. Renaming and merging

Note: Half datasets, with separate files for east and west subplots have been merged manually in excel

In [116]:
%%time

import os
import math
from datetime import datetime as dt
import numpy as np
import pandas as pd
from copy import copy

# Dictionaries
import json
from pprint import pprint

# Visualisation
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages

# To display df nicely in loops
from IPython.display import display
# display(df1.head())
# display(df2.head())

# Display rows and columns Pandas
pd.options.display.max_columns = 100
pd.set_option('display.max_rows', 100)

# # For displaying max rows in series
# pd.options.display.max_rows = 10

Wall time: 0 ns


In [117]:
# Prints the current working directory
os.getcwd()
# os.listdir()

'C:\\Users\\fahad\\MegaSync\\NMBU\\GitHub\\vPheno'

## Finding Username folder to make general path for multi PC use

In [118]:
username = str(os.getcwd()).split('\\')[2]
username

'fahad'

## Importing Data

In [119]:
main_path = r'./Data/'
path_complete = r'./Data/1. complete_datasets/'

path_yield = r'C:\\Users\\'+username+'\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021'

export_path = './Data/2,1. renamed_merged/'

# Create export_path folder if not exists already
os.makedirs(export_path, exist_ok=True)

os.listdir(main_path)

['1. complete_datasets',
 '2,1. renamed_merged',
 '2,2. plots',
 '2,2. renamed_merged',
 '3. merged data',
 '4. results_DM',
 '4. results_GY',
 'base_indices_columns.json',
 'spectral_indices_columns.json',
 'staur_weather_columns.json',
 'vollebekk_weather_columns.json',
 'yield_columns.json',
 'yield_df.json']

## Data Preparation
### Creating list of complete files

In [120]:
# Get the list of all files in directory tree at given path

files_with_address = []
files_list = []

for (dirpath, dirnames, filenames) in os.walk(path_complete):
    files_with_address += [os.path.join(dirpath, file) for file in filenames]
    files_list.extend(filenames)
    
print(len(files_with_address), 'files found in the directory')
# files_with_address
# files_list

74 files found in the directory


## Data Checking/control

### Check for duplicate filenames

In [121]:
print('Total number of files are :', len(files_list))

print('Number of unique file names are:', len(set(files_list)))

print('There is/are', len(files_list) - len(set(files_list)),'duplicate file name/names.')
if len(files_list) - len(set(files_list)) > 0:
    raise NameError

Total number of files are : 74
Number of unique file names are: 74
There is/are 0 duplicate file name/names.


In [122]:
# files_list

In [123]:
len(files_with_address)
# files_with_address

74

## Importing data files to Pandas

In [124]:
%%time

all_df = []
for data in files_with_address:
    file_name = os.path.splitext(os.path.basename(data))[0]

    # Replce all invalid characters in the name
    file_name = file_name.replace(" ", "_")
    file_name = file_name.replace("-", "_")
    file_name = file_name.replace(")", "")
    file_name = file_name.replace("(", "")
    df_name = file_name.replace(".", "")
    # Test: Check if the same date is already present in the current dict key
    if df_name in all_df:
        print(f'A file with the same name {df_name} has already been imported. \n Please check if there is duplication of data.')
        raise NameError
    all_df.append(df_name)

    locals()[df_name] = pd.read_csv(data, index_col=False)
    print(df_name, '=====', locals()[df_name].shape)

Graminor_010720 ===== (400, 6)
Graminor_020719 ===== (600, 6)
Graminor_040720 ===== (800, 6)
Graminor_040820 ===== (793, 6)
Graminor_050719 ===== (597, 6)
Graminor_050819 ===== (600, 6)
Graminor_060619 ===== (600, 6)
Graminor_070720 ===== (800, 6)
Graminor_070819 ===== (600, 6)
Graminor_110619 ===== (600, 6)
Graminor_110719 ===== (300, 6)
Graminor_130720 ===== (800, 6)
Graminor_140820 ===== (800, 6)
Graminor_150719 ===== (600, 6)
Graminor_150819 ===== (600, 6)
Graminor_170720 ===== (400, 6)
Graminor_180620 ===== (400, 6)
Graminor_200720 ===== (400, 6)
Graminor_240620 ===== (757, 6)
Graminor_250719 ===== (600, 6)
Graminor_280619 ===== (600, 6)
Graminor_300720 ===== (787, 6)
Masbasis_010720 ===== (688, 6)
Masbasis_020621 ===== (696, 6)
Masbasis_050719 ===== (528, 6)
Masbasis_060619 ===== (528, 6)
Masbasis_070819 ===== (528, 6)
Masbasis_070820 ===== (688, 6)
Masbasis_080720 ===== (688, 6)
Masbasis_090721 ===== (696, 6)
Masbasis_120820 ===== (688, 6)
Masbasis_130720 ===== (688, 6)
Masbasis

In [125]:
print(f'Total imported {len(all_df)}')

Total imported 74


# Standardizing the names

## Creating a dictionary with all dates for a certain field for a certain year

In [126]:
%%time

elements_to_strip = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_-()."

all_df_std = []

field_year_dict = {}

# A reference dict to keep record of the names of files before they got renamed 
renamed_to_from = {}

for df in all_df:
    
    # Getting date from the df name
    field_name = df.split('_')[0]
    date_ = df.split('_')[1]

    new_df_name = field_name +'_'+date_
    
    # Drop all columns except the std columns 
    locals()[new_df_name] = locals()[df].copy()
    
    all_df_std.append(new_df_name)
    
    # Creating a dict with all dates for a certain field for a certain year
    dict_key = field_name+'_20'+date_[-2:]
    
    # If the Field name is present in dict then add the date to that key
    # Otherwise, create new key for current field and add the date to it
    if dict_key in field_year_dict:
        # Test: Check if the same date is already present in the current dict key
        if date_ in field_year_dict[dict_key]:
            print(f'Duplicate Data file Error: {date_} is already present in {dict_key}\n Current df {df}\n Conflict with {renamed_to_from[new_df_name]}')
            raise NameError
        field_year_dict[dict_key].append(date_)
    else:
        field_year_dict[dict_key] = [date_]
    
    # Adding new and old names to a dict for record
    renamed_to_from[new_df_name] = df
#     print( new_df_name, '*********', df)
#     print(date_)
print('field_year_dict created.')

pprint(field_year_dict)
# all_df_std


field_year_dict created.
{'Graminor_2019': ['020719',
                   '050719',
                   '050819',
                   '060619',
                   '070819',
                   '110619',
                   '110719',
                   '150719',
                   '150819',
                   '250719',
                   '280619'],
 'Graminor_2020': ['010720',
                   '040720',
                   '040820',
                   '070720',
                   '130720',
                   '140820',
                   '170720',
                   '180620',
                   '200720',
                   '240620',
                   '300720'],
 'Masbasis_2019': ['050719',
                   '060619',
                   '070819',
                   '150719',
                   '220719',
                   '260619',
                   '280619',
                   '290719'],
 'Masbasis_2020': ['010720',
                   '070820',
                   '080720',
               

## Test: Check if there are duplicate datasets/names in all_df_std

In [127]:

if len(all_df_std) > len(set(all_df_std)):
    duplicates = len(all_df_std) - len(set(all_df_std))
    if duplicates>1:
        verb, plural='are', 's'
    else:
        verb, plural='is', ''
    print(f'Error:\nThere {verb} {duplicates} duplicate name{plural} in the datasets out of total {len(all_df_std)}.\n \
    Make sure no dataset has been lost because of data being separated in east/west fields on the same date')
    # Printing the names of the duplicate datasets, if any
    find_duplicates=[]
    for i in all_df_std:
        if i not in find_duplicates:
            find_duplicates.append(i)
        else:
            print(f'Duplicate dataset named \'{i}\',\n')
    raise NameError
    
else:
    print('No duplicate dataset found')

No duplicate dataset found


## Arranging the dates in field_year_dict in ascending order

In [128]:
sorted_field_year_dict = {}
for key, dates_list in field_year_dict.items():
    # Converting the dates to a datetime date object and sorting them in list
    sorted_dated = sorted([
        dt.strptime(date, '%d%m%y').date()
        for date in dates_list])

    sorted_field_year_dict[key] = sorted_dated

pprint(sorted_field_year_dict)

{'Graminor_2019': [datetime.date(2019, 6, 6),
                   datetime.date(2019, 6, 11),
                   datetime.date(2019, 6, 28),
                   datetime.date(2019, 7, 2),
                   datetime.date(2019, 7, 5),
                   datetime.date(2019, 7, 11),
                   datetime.date(2019, 7, 15),
                   datetime.date(2019, 7, 25),
                   datetime.date(2019, 8, 5),
                   datetime.date(2019, 8, 7),
                   datetime.date(2019, 8, 15)],
 'Graminor_2020': [datetime.date(2020, 6, 18),
                   datetime.date(2020, 6, 24),
                   datetime.date(2020, 7, 1),
                   datetime.date(2020, 7, 4),
                   datetime.date(2020, 7, 7),
                   datetime.date(2020, 7, 13),
                   datetime.date(2020, 7, 17),
                   datetime.date(2020, 7, 20),
                   datetime.date(2020, 7, 30),
                   datetime.date(2020, 8, 4),
                   da

## Summary of remaining data

In [129]:
for field, dates in sorted_field_year_dict.items():
    rows_df = []
    for date in dates:
        field_name = field.split('_')[0]+'_'+date.strftime('%d%m%y')
        temp_df = locals()[field_name].copy()
        rows_df.append(temp_df.shape[0])
    print(field.split('_')[0], date.year, 'Rows:', rows_df)

Graminor 2020 Rows: [400, 757, 400, 800, 800, 800, 400, 400, 787, 793, 800]
Graminor 2019 Rows: [600, 600, 600, 600, 597, 300, 600, 600, 600, 600, 600]
Masbasis 2020 Rows: [688, 688, 688, 688, 688, 688, 688, 688, 688, 688, 688, 688]
Masbasis 2021 Rows: [696, 696, 696, 696, 696, 696, 696]
Masbasis 2019 Rows: [528, 528, 528, 528, 528, 528, 528, 528]
Robot 2020 Rows: [96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96]
Staur 2019 Rows: [1328, 1328, 1328, 1326, 1328]
Staur 2020 Rows: [1506, 1506, 1506, 1506, 1506, 1506]



# Find grain yield, DH, DT etc values and attach to the dataframes

In [130]:
os.listdir(path_yield)

['2019 Staur Graminor',
 '2019 Vollebekk Graminor',
 '2019 Vollebekk Masbasis',
 '2020 Staur Graminor',
 '2020 Staur Masbasis_bandsNA',
 '2020 Vollebekk Graminor',
 '2020 Vollebekk Masbasis',
 '2020 Vollebekk Robot',
 '2021 Vollebekk Masbasis_yieldNA']

## Data Preparation
### Creating list of all files

In [131]:
# Get the list of all files in directory tree at given path_yield

files_with_address = []
files_list = []

for (dirpath, dirnames, filenames) in os.walk(path_yield):
    files_with_address += [os.path.join(dirpath, file) for file in filenames]
    files_list.extend(filenames)
    
print(len(files_with_address), 'files found in the directory')
# files_with_address
# files_list
files_with_address_bkp = copy(files_with_address)

87 files found in the directory


### Checking if there are multiple sheets in the files

In [132]:
# Print number of sheets in all files
print('The following files have multiple sheets.')

list_multi_sheet = []
for file in files_with_address:
    xl_file = pd.ExcelFile(file,engine='openpyxl')
    number_of_sheets = len(xl_file.sheet_names)
    if number_of_sheets > 1:
        print(number_of_sheets, os.path.basename(file), 'in folder', os.path.basename(os.path.dirname(file))
)
        list_multi_sheet.append(file)

The following files have multiple sheets.
3 19BMLFN3 - MASBASIS yield trial Staur 2019.xlsx in folder 2019 Staur Graminor
4 19TvPhenores.xlsx in folder 2019 Vollebekk Graminor
4 19BMLGI1 - MASBASIS yield trial Vollebekk 2019.xlsx in folder 2019 Vollebekk Masbasis
3 Staur-Graminor-Masbasis_2020.xlsx in folder 2020 Staur Graminor
2 20BMLFN3 - MASBASIS avlingsforsøk Staur 2020 lodging data.xlsx in folder 2020 Staur Masbasis_bandsNA
2 Masbasis_2020_staur.xlsx in folder 2020 Staur Masbasis_bandsNA
2 Staur_maturity_heading_yield_2020.xlsx in folder 2020 Staur Masbasis_bandsNA
3 20BMLGI1_2020_tm.xlsx in folder 2020 Vollebekk Masbasis
3 Masbasis_Mica_2020_all_dates_MEDIAN_DP.xlsx in folder 2020 Vollebekk Masbasis
3 ROBOT_2020.xlsx in folder 2020 Vollebekk Robot


In [133]:
# Files dropped in the initial stage
list_multi_sheet

['C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2019 Staur Graminor\\19BMLFN3 - MASBASIS yield trial Staur 2019.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2019 Vollebekk Graminor\\19TvPhenores.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2019 Vollebekk Masbasis\\19BMLGI1 - MASBASIS yield trial Vollebekk 2019.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2020 Staur Graminor\\Staur-Graminor-Masbasis_2020.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2020 Staur Masbasis_bandsNA\\20BMLFN3 - MASBASIS avlingsforsøk Staur 2020 lodging data.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2020 Staur Masbasis_bandsNA\\Masbasis_2020_staur.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2020 Staur Masbasis_bandsNA\\Staur_maturity_heading_yield_2020.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU

### Identifying files without dates
(with 2019 in name means they dont have date format)

In [134]:
files_w_2019 = []
for file in files_with_address:
    if file not in list_multi_sheet:
        file_name = os.path.basename(file)
        if '2019' in file_name:
            print(file_name, 'in folder', os.path.basename(os.path.dirname(file)))
            files_w_2019.append(file)
        if '2020' in file_name:
            print(file_name, 'in folder', os.path.basename(os.path.dirname(file)))
            files_w_2019.append(file)

Staur_Graminor_2019_median PLT918-2050 240719 070819 150819 210819 300819.xlsx in folder 2019 Staur Graminor
Staur_Masbasis_2019 PLT101-866 240719 070819 150819 300819.xlsx in folder 2019 Staur Graminor
2020TGraminor-Vollebekk-res.xlsx in folder 2020 Vollebekk Graminor


In [135]:
files_w_2019

['C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2019 Staur Graminor\\Staur_Graminor_2019_median PLT918-2050 240719 070819 150819 210819 300819.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2019 Staur Graminor\\Staur_Masbasis_2019 PLT101-866 240719 070819 150819 300819.xlsx',
 'C:\\\\Users\\\\fahad\\MegaSync\\NMBU\\Master Thesis\\Data\\Feb2021\\2020 Vollebekk Graminor\\2020TGraminor-Vollebekk-res.xlsx']

### Importing all files & their sheets into pandas

In [136]:
%%time

df_yield = []

# A reference dict to keep record of the names of files before they got renamed 
yield_import_to_from = {}

for data in list_multi_sheet+files_w_2019:
    file_name = os.path.splitext(os.path.basename(data))[0]
    # Moving numbers to the end of the string/file_name
    parent_dir = os.path.dirname(data).split('\\')[-1]
    file_name = parent_dir+'_x_'+file_name
        
#     # Parent dir
#     dir1_name = os.path.basename(os.path.dirname(data))
#     # grand parent dir
#     dir2_name = os.path.basename(os.path.dirname(os.path.dirname(data)))

    # Replce all invalid characters in the name
    file_name = file_name.replace("-", "_")
    file_name = file_name.replace(" ", "_")
    file_name = file_name.replace("(", "")
    file_name = file_name.replace(")", "")
    file_name = file_name.replace(".", "")
    file_name = 'df_'+file_name
    xl_file = pd.ExcelFile(data,engine='openpyxl')
    
    for sheet in xl_file.sheet_names:
        if len(xl_file.sheet_names)>1:
            sheet_str = sheet
            sheet_str = sheet_str.replace("-", "_")
            sheet_str = sheet_str.replace(" ", "_")
            sheet_str = sheet_str.replace("(", "")
            sheet_str = sheet_str.replace(")", "")
            sheet_str = sheet_str.replace(".", "")

            df_name = file_name+'_x_'+sheet_str
            
        else:
            df_name = file_name

        # Test: Check if the same date is already present in the current dict key
        if df_name in df_yield:
            print(f'A file with the same name {df_name} has already been imported. \n Please check if there is duplication of data.')
            raise NameError
    
        df_yield.append(df_name)
        print(df_name)
        # Adding new and old names+path to a dict for record
        yield_import_to_from[df_name] = data
        
        locals()[df_name] = pd.read_excel(data, sheet_name=sheet, engine='openpyxl')


df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data


  warn("""Cannot parse header or footer so it will be ignored""")


df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Field_map
df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_lsmeans
df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_kart
df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res
df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_kart
df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res
df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data
df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_lsmeans
df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_Sheet1
df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_Sheet2
df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Map
df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res
df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Sheet1
df_2020_Staur_Masbasis_bandsNA_x_20BMLFN3___MASBASIS_avlingsforsøk_Staur_2020_lodging_data_x_F

## Identifying the sheets with yield data

In [137]:
# Declaring list to grab df for genomics data
df_genomics = []

In [138]:
# Declaring dict to grab df with yield data
yield_src_dict = {}

In [139]:
# Listing datasets for reference
for df in all_df_std:
    print(df, locals()[df].shape, 'Min Plot_ID value:', locals()[df]['Plot_ID'].min(), 'Max Plot_ID value:', locals()[df]['Plot_ID'].max())

Graminor_010720 (400, 6) Min Plot_ID value: 151 Max Plot_ID value: 900
Graminor_020719 (600, 6) Min Plot_ID value: 101 Max Plot_ID value: 875
Graminor_040720 (800, 6) Min Plot_ID value: 101 Max Plot_ID value: 900
Graminor_040820 (793, 6) Min Plot_ID value: 101 Max Plot_ID value: 900
Graminor_050719 (597, 6) Min Plot_ID value: 101 Max Plot_ID value: 875
Graminor_050819 (600, 6) Min Plot_ID value: 101 Max Plot_ID value: 875
Graminor_060619 (600, 6) Min Plot_ID value: 101 Max Plot_ID value: 875
Graminor_070720 (800, 6) Min Plot_ID value: 101 Max Plot_ID value: 900
Graminor_070819 (600, 6) Min Plot_ID value: 101 Max Plot_ID value: 875
Graminor_110619 (600, 6) Min Plot_ID value: 101 Max Plot_ID value: 875
Graminor_110719 (300, 6) Min Plot_ID value: 501 Max Plot_ID value: 875
Graminor_130720 (800, 6) Min Plot_ID value: 101 Max Plot_ID value: 900
Graminor_140820 (800, 6) Min Plot_ID value: 101 Max Plot_ID value: 900
Graminor_150719 (600, 6) Min Plot_ID value: 101 Max Plot_ID value: 875
Gramin

Inspecting the datasets from Graminor, it can be noted that the plot id of all the plots of Graminor correspond to the range from 100 to 900, which are aparently all in Gramminor. Staur Plot IDs are 4 digit. So, we do not any bands data from Staur at this point.
Masbasis 1101 to 1892

### Vollebekk Graminor 2019 
19TvPhenores

In [140]:
# List of sheets with Graminor and 2019 in name
[x for x in df_yield if 'Graminor' in x and '2019' in x]

['df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data',
 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Field_map',
 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_lsmeans',
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_kart',
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res',
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_kart',
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res',
 'df_2019_Staur_Graminor_x_Staur_Graminor_2019_median_PLT918_2050_240719_070819_150819_210819_300819',
 'df_2019_Staur_Graminor_x_Staur_Masbasis_2019_PLT101_866_240719_070819_150819_300819']

**Already droped**  
Checking if the sheets from the file **19TvPhenores** and **19TvPhenores_pedigree** are the same

Turns out that the file **19TvPhenores** in **2019 Vollebekk Graminor** and in **2019 Staur**, and **19TvPhenores_pedigree** file in **2019 Staur** have the same sheets regarding grain yield data. So we will use the **19TvPhenores** in **2019 Vollebekk Graminor** directory.

**19TvPhenores_pedigree** and **19TvPhenores** files in Staur 2019 have been removed.
We will use **19TvPhenores** from **2019 Vollebekk Graminor** folder


In [141]:
# Adding df to be used later for genomics data matching
df_genomics.extend([])
# No data relavant for genomics
# Will be added in section Adding Yield Data to all datasets

# Removing the files that have been inspected
inspected_df_vb_grm19 = [
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_kart',
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_kart',
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res',
 'df_2019_Staur_Graminor_x_Staur_Graminor_2019_median_PLT918_2050_240719_070819_150819_210819_300819',
 'df_2019_Staur_Graminor_x_Staur_Masbasis_2019_PLT101_866_240719_070819_150819_300819']

df_yield = [x for x in df_yield if x not in inspected_df_vb_grm19]

In [142]:
yield_src_dict['Graminor_2019_Vollebekk'] = 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res'

### Vollebekk Graminor 2020

In [143]:
# List of sheets with Graminor and 2020 in name
[x for x in df_yield if 'Graminor' in x and '2020' in x and 'Vollebekk' in x]

['df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res']

In [144]:
# Adding df to be used later for genomics data matching
df_genomics.extend([])
# No data relavant for genomics
# Will be added in section Adding Yield Data to all datasets

# Removing the files that have been inspected
inspected_df_vb_grm20 = ['df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res']

df_yield = [x for x in df_yield if x not in inspected_df_vb_grm20]

In [145]:
yield_src_dict['Graminor_2020_Vollebekk'] = 'df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res'


### Vollebekk Masbasis 2019

In [146]:
# List of sheets with Masbasis and 2019 and Vollebekk in name
[x for x in df_yield if 'Masbasis' in x and '2019' in x and 'Vollebekk' in x ]

['df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data',
 'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_lsmeans',
 'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_Sheet1',
 'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_Sheet2']

Masbasis_2019_x_Field_data_2019 and Masbasis_2019_x_NEW_Field_data_2019 have same data. So deleting df_2019_Vollebekk_Masbasis_x_Field_data_2019

For df_2019_Vollebekk_Masbasis_x_Masbasis_2019_ForDP_median, the difference is extremely small. So we will use df_2019_Vollebekk_Masbasis_x_Masbasis_2019_ForDP_median. Deleting df_2019_Vollebekk_Masbasis_x_NEW_Field_data_2019

df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data  
has the exact same data as in df_2019_Vollebekk_Masbasis_x_Masbasis_2019_ForDP_median.  
Deleting df_2019_Vollebekk_Masbasis_x_Masbasis_2019_ForDP_median

In [147]:
# Comparing two series to show only the mismatching entreis
pd.set_option("display.precision", 14)
# comparision = df_2019_Vollebekk_Masbasis_x_Masbasis_2019_ForDP_median['GrainYield'].compare(df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data['GrainYield'])
# comparision.iloc[:,0:1].sub(comparision.iloc[:,1:2])
# comparision.head()

In [148]:
# Adding df to be used later for genomics data matching
df_genomics.extend([
    'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data',
    'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_lsmeans',
    'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_Sheet1',
    'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_Sheet2'
])

# Removing the files that have been inspected
inspected_df_vb_mas19 = [
    'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data',
    'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_lsmeans',
    'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_Sheet1',
    'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_Sheet2'
]

df_yield = [x for x in df_yield if x not in inspected_df_vb_mas19]

In [149]:
yield_src_dict['Masbasis_2019_Vollebekk'] = 'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data'

### Vollebekk Masbasis 2020

In [150]:
# List of sheets with Masbasis and 2020 in name
[x for x in df_yield if 'Masbasis' in x and '2020' in x and 'Vollebekk' in x]

['df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data',
 'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_lsmeans',
 'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_lsmeans_lmer',
 'df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_Sheet1',
 'df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_biomass',
 'df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_Without_loadging']

In [151]:
# print(df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data.equals(df_2020_Vollebekk_Masbasis_x_Masbasis_vollebekk_trial_data_2020_x_data))
# print(df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_lsmeans.equals(df_2020_Vollebekk_Masbasis_x_Masbasis_vollebekk_trial_data_2020_x_lsmeans))
# print(df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_lsmeans_lmer.equals(df_2020_Vollebekk_Masbasis_x_Masbasis_vollebekk_trial_data_2020_x_lsmeans_lmer))


df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm   
is exactly the same as  
df_2020_Vollebekk_Masbasis_x_Masbasis_vollebekk_trial_data_2020.    
So deleting df_2020_Vollebekk_Masbasis_x_Masbasis_vollebekk_trial_data_2020.

In [152]:
# df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data['GrainYield'].equals(df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_Sheet1['GrainYield'])

Moreover, the GrainYield data in  
df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data  
and 
df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_Sheet1  
is exactly the same. So dropping df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_Sheet1 from further processing. 

Important to note that in **Masbasis_Mica_2020_all_dates_MEDIAN_DP**, the subplots with lodging more than 10 have been dropped in the sheet named **Without_lodging**. It also dropped some red highlighted rows and some more for no aparent reason. There maybe a filtering strategy to drop unreliable subplots here. ToDo: Need to investigate and discuss with Sahameh

In [153]:
df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data['GrainYield'].dropna()

4      713.33333333333337
5      677.33333333333337
6      361.33333333333331
7      697.33333333333337
8      664.00000000000000
              ...        
679    646.66666666666663
680    478.66666666666669
681    732.00000000000000
682    600.00000000000000
683    714.66666666666663
Name: GrainYield, Length: 571, dtype: float64

In [154]:
df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_lsmeans_lmer['GrainYield']

0      692.64982695641095
1      677.86196971022900
2      662.28221171870598
3      667.96322901550400
4      654.33320147100403
              ...        
283    517.81248520565703
284    593.26000855945904
285    499.96906032742100
286    539.15045569047300
287    535.09866305721403
Name: GrainYield, Length: 288, dtype: float64

And analysing the grain yields in thee above sheets for Masbasis 2020, **'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data'** seems to have more data than in **'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_lsmeans_lmer'**. Also, the second sheet seems to have the yields for different varieties, not for individual subplots. If a varaiety is sown in two subplots, the secondd file only has one entry for that variety. So, it is decided to use **'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data'** to get the grain yield data.

In [155]:
# Adding df to be used later for genomics data matching
df_genomics.extend([])
# No data relavant for genomics
# Will be added in section Adding Yield Data to all datasets

# Removing the files that have been inspected
inspected_df_st_mas20 = ['df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data',
 'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_lsmeans',
 'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_lsmeans_lmer',
 'df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_Sheet1',
 'df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_biomass',
 'df_2020_Vollebekk_Masbasis_x_Masbasis_Mica_2020_all_dates_MEDIAN_DP_x_Without_loadging']

df_yield = [x for x in df_yield if x not in inspected_df_st_mas20]


In [156]:
yield_src_dict['Masbasis_2020_Vollebekk'] = 'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data'

### Staur 2019

In [157]:
# List of sheets with Graminor and 2019 in name
[x for x in df_yield if 'Staur' in x and '2019' in x]
# [x for x in df_yield if 'Masbasis' in x and '2019' in x and not 'Vollebekk' in x ]

['df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data',
 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Field_map',
 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_lsmeans',
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res']

#### Graminor 2019

In [158]:
df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res.head()

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Pl.,Akssk.,Gulm.,Avling,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34
0,r_expt,r_location,plot,bloc,ibloc,entry,,name,pedigree,Legde,høyde,Juli,Aug.,kg/daa,,,,,,,,,,,,,,,,,,,,,
1,19T1A 2,Staur,1301,1,1,14,,GN12760,NK02529/NK01533//NK01568,20,,22,,583.00721062618595,,,,,,,,,,,,,,,,,,,,,
2,19T1A 2,Staur,1302,1,1,2,,Demonstrant,T1005/NK93512,40,,23,,473.86110056925997,,,,,,,,,,,,,,,,,,,,,
3,19T1A 2,Staur,1303,1,1,11,,GN14547,SW45126/NK01533,30,,20,,511.10436432637567,,,,,,,,,,,,,,,,,,,,,
4,19T1A 2,Staur,1304,1,1,15,,GN15549,SW45126/GN03529,30,,21,,512.30360531309304,,,,,,,,,,,,,,,,,,,,,


df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res  
and  
df_2019_Vollebekk_Graminor_x_Graminor_Staur_Vollebekk_2019_x_Staur_res_new  
are not exactly the same.   

Moreover Plot_ID from 1751 to 1775 are double in the second df.   
And from 1801 to 1850 the values in all other columns are duplicate of the above subplot.  
So all of them will be dropped from the following df.  
df_2019_Vollebekk_Graminor_x_Graminor_Staur_Vollebekk_2019_x_Staur_res_new  


In [159]:
df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res.shape

(801, 35)

##### Dropping duplicate Plot_ID entries in yield data

In [160]:
staur_grm19_temp = df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res.copy()

# Dropping entries with missing plot_ID /Nan entries
staur_grm19_temp.dropna(subset=['Unnamed: 2'], inplace=True)
staur_grm19_temp.shape

(801, 35)

In [161]:
# Finding identical entries in Plot_ID column
duplicates_all = staur_grm19_temp[staur_grm19_temp.duplicated(subset='Unnamed: 2')]['Unnamed: 2']

if duplicates_all.size > 0:
    # Dropping one of the two identical duplicates entries
    staur_grm19_temp.drop_duplicates(inplace=True)
    
    duplicates_non_ident = staur_grm19_temp[staur_grm19_temp.duplicated(subset='Unnamed: 2')]['Unnamed: 2']
    identical_dup_dropped = duplicates_all.size - duplicates_non_ident.size
    print(f'{identical_dup_dropped}      identical duplicate entries dropped.')
    
#     print(duplicates_all.size, duplicates_non_ident.size, identical_dup_dropped)

    # Dropping both entries for non-identical duplicate entries
    staur_grm19_temp.drop_duplicates(subset = 'Unnamed: 2', keep=False, inplace=True)
    print(f'{duplicates_non_ident.size*2} non-identical duplicate entries dropped.')

    print(f'Total     {identical_dup_dropped + duplicates_non_ident.size*2}     duplicate entries deleted.')
df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res = staur_grm19_temp.copy()

#### Masbasis 2019

In [162]:
df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data.head()

Unnamed: 0,Rute,Plot,Design,Rep,Block,PLT,Col,Nursery,Line,MASBASIS 2015,Name,Fra forsøk,Nr.,Rute.1,PH,LD1,LD2,LD3,LD_Mean,HD,MAT,DH,DM,Moisture,Yield,TW
0,101,1,alpha-lattice,1,1,1,1,MASBASIS,1521,1521,GN11527,18BMLGI1,1260,101,90,5.0,60,70,65.0,19,5,45.0,91,32.58823529411764,562.8306730661905,70.23529411764706
1,102,2,alpha-lattice,1,1,2,2,MASBASIS,1622,1622,GN14506,18BMLGI1,1223,102,86,0.0,50,60,55.0,23,18,49.0,104,38.0,494.1176470588236,77.11764705882354
2,103,3,alpha-lattice,1,1,3,3,MASBASIS,1609,1609,GN14529,18BMLGI1,1228,103,86,0.0,5,20,12.5,22,15,48.0,101,27.76470588235294,548.2754771737917,74.23529411764706
3,104,4,alpha-lattice,1,1,4,4,MASBASIS,24,1075,NK93604,18BMLGI1,1236,104,78,2.0,10,40,25.0,20,-2,46.0,84,32.0,480.00000000000006,71.70588235294117
4,105,5,fill,1,1,5,5,MASBASIS,94,1120,DH20097,18BMLGI1,1161,105,94,90.0,99,99,99.0,18,8,44.0,94,48.94117647058824,339.4878892733564,76.52941176470588


In [163]:
# Adding df to be used later for genomics data matching
df_genomics.extend([])

# Removing the files that have been inspected
inspected_df_st_grm19 = ['df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data',
 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Field_map',
 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_lsmeans',
 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res']

df_yield = [x for x in df_yield if x not in inspected_df_st_grm19]


In [164]:
# # Can only be the names of df from the df_yield list

yield_src_dict['Staur_Graminor_2019'] = 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res'
yield_src_dict['Staur_Masbasis_2019'] = 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data'

### Staur 2020

In [165]:
# List of sheets with Graminor and 2020 in name
[x for x in df_yield if 'Staur' in x and '2020' in x]
# [x for x in df_yield if 'Graminor' in x and '2020' in x and not 'Vollebekk' in x]
# [x for x in df_yield if 'Masbasis' in x and '2020' in x and not 'Vollebekk' in x]

['df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Map',
 'df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res',
 'df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Sheet1',
 'df_2020_Staur_Masbasis_bandsNA_x_20BMLFN3___MASBASIS_avlingsforsøk_Staur_2020_lodging_data_x_Feltbok',
 'df_2020_Staur_Masbasis_bandsNA_x_20BMLFN3___MASBASIS_avlingsforsøk_Staur_2020_lodging_data_x_kart',
 'df_2020_Staur_Masbasis_bandsNA_x_Masbasis_2020_staur_x_Feltbok',
 'df_2020_Staur_Masbasis_bandsNA_x_Masbasis_2020_staur_x_kart',
 'df_2020_Staur_Masbasis_bandsNA_x_Staur_maturity_heading_yield_2020_x_Feltbok',
 'df_2020_Staur_Masbasis_bandsNA_x_Staur_maturity_heading_yield_2020_x_kart']

In [166]:
df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res.head()
df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res

Unnamed: 0,Staur,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,% Late,% Late.1,Plant,Yellow,%,Yield
0,r_expt,r_location,plot,bloc,ibloc,entry,,name,pedigree,Tillers,Lodging,height,Ripe,Water,kg/daa
1,,,,,,,,,,,,,,,
2,20T7B2,Staur,125,1,1,24,,GN19584,GN06573/SW46375//GN03531,,,55,,16.55971896955504,491.09877798861481
3,20T7B2,Staur,126,1,1,20,,GN19565,SW71142/GN07574,,,68,,17.17962248832961,703.0642277039849
4,20T7B2,Staur,127,1,1,14,,GN19548,GN03509/SW51127,,,58,,16.97054945054945,650.90677798861486
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1047,20T19B2,Staur,2246,2,10,25,,GN20721,GN03509/4/Alt/Ae.squ//2*Seri/3/Avle,,,75,,16.926,547.40341555977238
1048,20T19B2,Staur,2247,2,10,10,,GN20705,Sommerset/Berserk//Berserk,,,70,,16.016,497.83984819734354
1049,20T19B2,Staur,2248,2,10,20,,GN20715,Sommerset/NK01513//BAJASS-5,,,75,,15.106,600.10018975332082
1050,20T19B2,Staur,2249,2,10,24,,GN20720,SW45126/NK01533,,,71,,18.655,496.00759013282743


#### Dropping entries with missing plot_ID /Nan entries

In [167]:
staur20_temp = df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res.copy()

# Dropping entries with missing plot_ID 
staur20_temp.dropna(subset=['Unnamed: 2'], inplace=True)

#### Dropping Duplicate entries

In [168]:
# Finding identical entries in Plot_ID column
duplicates_all = staur20_temp[staur20_temp.duplicated(subset='Unnamed: 2')]['Unnamed: 2']

if duplicates_all.size > 0:
    # Dropping one of the two identical duplicates entries
    staur20_temp.drop_duplicates(inplace=True)
    
    duplicates_non_ident = staur20_temp[staur20_temp.duplicated(subset='Unnamed: 2')]['Unnamed: 2']
    identical_dup_dropped = duplicates_all.size - duplicates_non_ident.size
    print(f'{identical_dup_dropped}     identical duplicate entries dropped.')
    
#     print(duplicates_all.size, duplicates_non_ident.size, identical_dup_dropped)

    # Dropping both entries for non-identical duplicate entries
    staur20_temp.drop_duplicates(subset = 'Unnamed: 2', keep=False, inplace=True)
    print(f'{duplicates_non_ident.size*2} non-identical duplicate entries dropped.')

    print(f'Total     {identical_dup_dropped + duplicates_non_ident.size*2}     duplicate entries deleted.')
df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res = staur20_temp.copy()

0     identical duplicate entries dropped.
4 non-identical duplicate entries dropped.
Total     4     duplicate entries deleted.


In [169]:
df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res[df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res.duplicated(subset='Unnamed: 2')]['Unnamed: 2']


Series([], Name: Unnamed: 2, dtype: object)

ToDo: Staur Masbasis data not processed yet

In [170]:
# Adding df to be used later for genomics data matching
df_genomics.extend([])

# Removing the files that have been inspected
inspected_df_staur20 = ['df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Map',
                        'df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res',
                        'df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Sheet1',
                        'df_2020_Staur_Masbasis_bandsNA_x_20BMLFN3___MASBASIS_avlingsforsøk_Staur_2020_lodging_data_x_kart',
                        'df_2020_Staur_Masbasis_bandsNA_x_Masbasis_2020_staur_x_kart',
                        'df_2020_Staur_Masbasis_bandsNA_x_Staur_maturity_heading_yield_2020_x_kart',

                        ]

df_yield = [x for x in df_yield if x not in inspected_df_staur20]

In [171]:
yield_src_dict['Staur_2020'] = 'df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res'

### Robot 2020

In [172]:
# List of sheets with Robot and 2020 in name
[x for x in df_yield if 'Robot' in x and '2020' in x]

['df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw',
 'df_2020_Vollebekk_Robot_x_ROBOT_2020_x_lsmeans_EntryRobot_x_Fert',
 'df_2020_Vollebekk_Robot_x_ROBOT_2020_x_layout']

In [173]:
df_2020_Vollebekk_Robot_x_ROBOT_2020_x_lsmeans_EntryRobot_x_Fert.shape

(48, 15)

In [174]:
df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw.shape

(96, 29)

**df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw** has more data points, and all of them correspond to the number of subplots on the Robot field of 2020. So this is the one used for grain yield data.

In [175]:
# Adding df to be used later for genomics data matching
df_genomics.extend([])
# No data relavant for genomics
# Will be added in section Adding Yield Data to all datasets


# Removing the files that have been inspected
inspected_df_robot20 = ['df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw',
 'df_2020_Vollebekk_Robot_x_ROBOT_2020_x_lsmeans_EntryRobot_x_Fert',
 'df_2020_Vollebekk_Robot_x_ROBOT_2020_x_layout']

df_yield = [x for x in df_yield if x not in inspected_df_robot20]


In [176]:
yield_src_dict['Robot_2020'] = 'df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw'

### Vollebekk Masbasis 2021

In [177]:
# List of sheets with Masbasis and 2021 in name
[x for x in df_yield if 'Masbasis' in x and '2021' in x]

[]

ToDo: Vollebekk Masbasis 2021 Yield data is not available

In [178]:
# yield_src_dict['Masbasis_2021'] = 

### Summary

Following datasets have grain yield data  
**OLD**  
Staur 2019: Graminor_2019_x_19TvPhenores_x_Staur_res  
Vollebekk 2019: Graminor_2019_x_19TvPhenores_x_Vollebekk_res  
Masbasis 2020: Masbasis_x_20BMLGI1_2020_tm_x_data  
Robot 2020: Robot_x_ROBOT_2020_x_raw  
Masbasis 2019: Masbasis_2019_x_Field_data_2019  
**OLD**  

In [179]:
yield_src_dict

{'Graminor_2019_Vollebekk': 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res',
 'Graminor_2020_Vollebekk': 'df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res',
 'Masbasis_2019_Vollebekk': 'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data',
 'Masbasis_2020_Vollebekk': 'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data',
 'Staur_Graminor_2019': 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res',
 'Staur_Masbasis_2019': 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data',
 'Staur_2020': 'df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res',
 'Robot_2020': 'df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw'}

In [180]:
# Yield data is required for the following fields/years
sorted_field_year_dict.keys()

dict_keys(['Graminor_2020', 'Graminor_2019', 'Masbasis_2020', 'Masbasis_2021', 'Masbasis_2019', 'Robot_2020', 'Staur_2019', 'Staur_2020'])

## ToDo: Automate: Exporting the path of datasets with yield data

In [181]:
yield_src_dict

# yield_import_to_from
yield_df_path = {}
for field, df in yield_src_dict.items():
    shrinked_path = yield_import_to_from[df].split(username)[1]
    yield_df_path[field] = [df, shrinked_path]
pprint(yield_df_path)

{'Graminor_2019_Vollebekk': ['df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res',
                             '\\MegaSync\\NMBU\\Master '
                             'Thesis\\Data\\Feb2021\\2019 Vollebekk '
                             'Graminor\\19TvPhenores.xlsx'],
 'Graminor_2020_Vollebekk': ['df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res',
                             '\\MegaSync\\NMBU\\Master '
                             'Thesis\\Data\\Feb2021\\2020 Vollebekk '
                             'Graminor\\2020TGraminor-Vollebekk-res.xlsx'],
 'Masbasis_2019_Vollebekk': ['df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data',
                             '\\MegaSync\\NMBU\\Master '
                             'Thesis\\Data\\Feb2021\\2019 Vollebekk '
                             'Masbasis\\19BMLGI1 - MASBASIS yield trial '
                             'Vollebekk 2019.xlsx'],
 'Masbasis_2020_Vollebekk': ['df_2020_Vollebekk_Masbasis_x_20

In [182]:
# Make sure the folder/dir is there. If not, create one
os.makedirs(main_path, exist_ok=True)
import json
a_file = open(main_path+'yield_df.json', "w")
json.dump(yield_df_path, a_file)
a_file.close()

# a_file = open("Data\std_columns.json", "r")
# output_str = a_file.read()

# # The file is imported as string
# # Converting it to python format
# std_columns = json.loads(output_str)
# a_file.close()
# print(std_columns)

## Adding yield data to all datasets

In [183]:
# Defining expty list to collect names of the final df with yields
df_w_yields = []

### Vollebekk Graminor 2019

In [184]:
yield_src_dict

{'Graminor_2019_Vollebekk': 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res',
 'Graminor_2020_Vollebekk': 'df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res',
 'Masbasis_2019_Vollebekk': 'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data',
 'Masbasis_2020_Vollebekk': 'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data',
 'Staur_Graminor_2019': 'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res',
 'Staur_Masbasis_2019': 'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data',
 'Staur_2020': 'df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res',
 'Robot_2020': 'df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw'}

In [185]:
yield_src_dict['Graminor_2019_Vollebekk']

'df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res'

In [186]:
# Dropping first row/ unnamed column headings and replacing them with the second row.
Graminor_2019_Vollebekk_temp = df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res.copy()
Graminor_2019_Vollebekk_temp.columns = df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Vollebekk_res.iloc[0]
Graminor_2019_Vollebekk_yield = Graminor_2019_Vollebekk_temp[1:]
Graminor_2019_Vollebekk_yield.head()

Unnamed: 0,r_expt,r_location,plot,bloc,ibloc,entry,NaN,name,pedigree,Legde,høyde,Juli,Aug.,kg/daa
1,19T1A 4,Vollebekk,101,1,1,1,,Zebra,Ralle/Dragon,,107.5,0,9,499.624440227704
2,19T1A 4,Vollebekk,102,1,1,11,,GN14547,SW45126/NK01533,,88.5,0,8,515.53275142315
3,19T1A 4,Vollebekk,103,1,1,13,,Tarrafal,,,87.0,0,10,529.5010246679317
4,19T1A 4,Vollebekk,104,1,1,14,,GN12760,NK02529/NK01533//NK01568,,95.0,0,7,544.5039848197345
5,19T1A 4,Vollebekk,105,1,1,4,,Bjarne,SvB87293/Bastian,,79.5,-1,8,529.5010246679317


In [187]:
field = 'Graminor_2019'
field_plot_id = 'Plot_ID'
ref_df_yield = Graminor_2019_Vollebekk_yield
red_df_id = 'plot'

# Rep (Replicate)
# Block
# (optional) Column and row
cols2add_dict = {'GrainYield':'kg/daa',
#                  'Replicates':'Rep',
                 'Block':'bloc',
                 'iBlock':'ibloc',
                 'Entry':'entry',
                 'Name':'name',
#                  'CodeName':'CodeName',
                 'Pedigree':'pedigree',
#                  'Line':'Line',
#                  'Heading_Date':'Hd_date_jd',
#                  'Maturity_Date':'maturity_dates',
#                  'Days2Heading':'DH',
#                  'Days2Maturity':'DM',
#                  'Lodging':'Lodging_images'
                }


for date in field_year_dict[field]:
    temp_df = locals()[field[:-4]+date].copy()
    temp_ref_df = ref_df_yield.copy()
    
    temp_df.set_index(field_plot_id, inplace=True)
    temp_ref_df.set_index(red_df_id, inplace=True)
    for field_df_col, ref_df_col in cols2add_dict.items():
        temp_df[field_df_col]=temp_ref_df[ref_df_col]
    new_df_name = field[:-4]+date+'_yield'
    locals()[new_df_name] = temp_df.reset_index()
    df_w_yields.append(new_df_name)
    print(new_df_name)
    display(locals()[new_df_name].head())

Graminor_020719_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.02268595807254,0.05619393475354,0.03246086649597,0.1366275921464,,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.02273678872734,0.06620932370424,0.02995380945504,0.16409155726433,,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.02160934358835,0.06065329909325,0.02854954078794,0.15512469410896,,529.5010246679317,1,1,13,Tarrafal,
3,104,0.02216789964587,0.06358679383993,0.027274729684,0.16070991009474,,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.02250274270773,0.06373646855354,0.0290665756911,0.15679793059826,,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_050719_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.01326411683112,0.03383284434676,0.01599823310971,0.09811945259571,0.28271350264549,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.01420369604602,0.04217976331711,0.01656366977841,0.12021178379655,0.33125692605972,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.01373060233891,0.03978736698627,0.0164987668395,0.11927407979965,0.34100607037544,529.5010246679317,1,1,13,Tarrafal,
3,104,0.01291039399803,0.03613347560167,0.01438284106553,0.10658553987741,0.34218749403954,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.01373865036294,0.03935918025672,0.01562700700015,0.11355617642403,0.35877726972103,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_050819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.03803116083145,0.0895079523325,0.11108756065369,0.20793855190277,0.33830684423447,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.03603747859597,0.08863708749414,0.09795098751783,0.20319047570229,0.33689895272255,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.033297451213,0.08074614033103,0.10181091353297,0.1858371719718,0.31033453345299,529.5010246679317,1,1,13,Tarrafal,
3,104,0.0336047783494,0.07923398911953,0.09677043929696,0.18409106135368,0.31025590002537,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.03652532957494,0.08715170621872,0.10119498148561,0.19789596647024,0.3243872076273,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_060619_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.06242692098022,0.12928310036659,0.40793709456921,0.12927827239037,0.18556605279446,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.0617047753185,0.12984679639339,0.40499365329742,0.12785806506872,0.18499064445496,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.06372924894095,0.13242745399475,0.43018206954002,0.13231310993433,0.19441443681717,529.5010246679317,1,1,13,Tarrafal,
3,104,0.06252481415868,0.1279901266098,0.3974148184061,0.13086164742708,0.18619595468044,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.0567969083786,0.12781752645969,0.46616086363792,0.12999281287193,0.1910372376442,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_070819_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.02807604242116,0.02491444628686,0.2610018402338,0.0425901375711,0.15500625967979,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.02548846974969,0.02321178279817,0.25131633877754,0.04270110651851,0.1452676653862,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.02272112946957,0.02281192876399,0.24728409200907,0.03516270592809,0.14132040739059,529.5010246679317,1,1,13,Tarrafal,
3,104,0.02087229304016,0.01935291476548,0.22758482396603,0.03850228339434,0.12739233672619,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.01983925234526,0.0182256475091,0.21779330074787,0.03714719600976,0.1203685477376,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_110619_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.07959732785821,0.15751262754202,0.40495406091213,0.16877496242523,0.22030594944954,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.07585316151381,0.15240409970283,0.40546563267708,0.16335503757,0.22033466398716,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.0751468539238,0.14990012347698,0.41259691119194,0.16410303115845,0.22153432667255,529.5010246679317,1,1,13,Tarrafal,
3,104,0.07334865629673,0.15178900957107,0.41691379249096,0.15911489725113,0.22440274059772,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.067687317729,0.14149172604084,0.42218396067619,0.1530072838068,0.22442173957825,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_110719_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,502,0.05823946371675,0.06730763614178,0.38215750455856,0.07207664102316,0.18410573899746,545.7722960151804,1,1,15,GN18518,QUARNA/Demonstrant//GN03509
1,503,0.06236682832241,0.06964331865311,0.37162837386131,0.07932868972421,0.18285194784403,557.7130929791272,1,1,2,Demonstrant,T1005/NK93512
2,504,0.05678546242416,0.06665751710534,0.37647946178913,0.06977969035506,0.18485528230667,627.4110056925996,1,1,20,GN18560,SW71142/GN07574
3,505,0.06271997466683,0.07211204618216,0.38343925774097,0.07763006910682,0.19532621651888,579.7776091081594,1,1,19,GN18550,GN06578/Zebra
4,506,0.05941797792911,0.07065228372812,0.39400914311409,0.07343448698521,0.19358253479004,706.5836812144213,1,2,9,SW160177,


Graminor_150719_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.03142616711557,0.09458422288299,0.04957745783031,0.20826117694378,0.69684690237045,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.02997508272529,0.10813122242689,0.04500363394618,0.23338027298451,0.7598734498024,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.02916492521763,0.10273046791554,0.04784424975514,0.22700147330761,0.74380332231522,529.5010246679317,1,1,13,Tarrafal,
3,104,0.02609614282847,0.0912002697587,0.03861665911972,0.20593857765198,0.76346364617348,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.02858125045896,0.10258588567376,0.04754335433245,0.22074384242296,0.76520222425461,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_150819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.02878511790186,0.05844127014279,0.117516040802,0.14003725349903,0.21981045603752,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.02570940647274,0.05182461813092,0.10239926353097,0.12284883856773,0.2010186240077,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.02409862633794,0.04782996885479,0.08490489423275,0.11317810043693,0.18477119505405,529.5010246679317,1,1,13,Tarrafal,
3,104,0.02250163350254,0.04489898495376,0.08538405969739,0.10882332921028,0.1826681420207,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.02201525401324,0.04317241534591,0.08270018175244,0.10348724573851,0.17202357202768,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_250719_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.02938761748374,0.06441292911768,0.04037766903639,0.16436159610748,0.38127216696739,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.02212803903967,0.04959484189749,0.03076799027622,0.12989164143801,0.2988944798708,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.02017462998629,0.05218221619725,0.03150171786547,0.13889007270336,0.31015455722809,529.5010246679317,1,1,13,Tarrafal,
3,104,0.02134160138667,0.0539444219321,0.03278078697622,0.13747247308493,0.31615297496319,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.01971418969333,0.05442130565643,0.03142018243671,0.14259034395218,0.31628808379173,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


Graminor_280619_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.02247066237032,0.07446896284819,0.57778042554855,0.03233716636896,0.17017534375191,499.624440227704,1,1,1,Zebra,Ralle/Dragon
1,102,0.02490571513772,0.10096704959869,0.69695979356766,0.03293595835566,0.22298657894134,515.53275142315,1,1,11,GN14547,SW45126/NK01533
2,103,0.02364889904857,0.09232769906521,0.72896891832352,0.03157468140125,0.21824322640896,529.5010246679317,1,1,13,Tarrafal,
3,104,0.02109001390636,0.08284860849381,0.70666056871414,0.02639321796596,0.19336797297001,544.5039848197345,1,1,14,GN12760,NK02529/NK01533//NK01568
4,105,0.02288624271751,0.09066342562437,0.75146371126175,0.02943593077362,0.21014194190502,529.5010246679317,1,1,4,Bjarne,SvB87293/Bastian


### Vollebekk Graminor_2020

In [188]:
yield_src_dict['Graminor_2020_Vollebekk']

'df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res'

In [189]:
df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res.head()

Unnamed: 0,20T1A,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,%,Avl.,Hl.-,%.1
0,r_expt,r_location,loccode,plot,bloc,ibloc,entry,,name,pedigree,Vann,kg/daa,vekt,Prot
1,20T1A_4,Vollebekk,04,101,1,1,1,,Zebra,Ralle/Dragon,15.9280701754386,654.70815939278953,78.8,11.5
2,20T1A_4,Vollebekk,04,102,1,1,19,,GN18666,GN04528/GN03509,15.7754954954955,709.59544592030363,78.3,10.7
3,20T1A_4,Vollebekk,04,103,1,1,11,,GN15590,Demonstrant/SW51114,16.22257194244604,707.10056925996207,80.09999999999999,10.6
4,20T1A_4,Vollebekk,04,104,1,1,5,,Mirakel,SW38337/NK98533//NK98535,15.32508865248227,724.95863377609112,78.09999999999999,10.8


In [190]:
# Dropping first row/ unnamed column headings and replacing them with the second row.
Graminor_2020_Vollebekk_temp = df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res.copy()
Graminor_2020_Vollebekk_temp.columns = df_2020_Vollebekk_Graminor_x_2020TGraminor_Vollebekk_res.iloc[0]
Graminor_2020_Vollebekk_yield = Graminor_2020_Vollebekk_temp[1:]
Graminor_2020_Vollebekk_yield.head()

Unnamed: 0,r_expt,r_location,loccode,plot,bloc,ibloc,entry,NaN,name,pedigree,Vann,kg/daa,vekt,Prot
1,20T1A_4,Vollebekk,4,101,1,1,1,,Zebra,Ralle/Dragon,15.9280701754386,654.7081593927895,78.8,11.5
2,20T1A_4,Vollebekk,4,102,1,1,19,,GN18666,GN04528/GN03509,15.7754954954955,709.5954459203036,78.3,10.7
3,20T1A_4,Vollebekk,4,103,1,1,11,,GN15590,Demonstrant/SW51114,16.22257194244604,707.1005692599621,80.1,10.6
4,20T1A_4,Vollebekk,4,104,1,1,5,,Mirakel,SW38337/NK98533//NK98535,15.32508865248227,724.958633776091,78.1,10.8
5,20T1A_4,Vollebekk,4,105,1,1,24,,GN18751,QUARNA/GN03531,16.84505962521295,740.978368121442,77.7,10.6


In [191]:
field = 'Graminor_2020'
field_plot_id = 'Plot_ID'
ref_df_yield = Graminor_2020_Vollebekk_yield
red_df_id = 'plot'

cols2add_dict = {'GrainYield':'kg/daa',
#                  'Replicates':'Rep',
                 'Block':'bloc',
                 'iBlock':'ibloc',
                 'Entry':'entry',
                 'Name':'name',
#                  'CodeName':'CodeName',
                 'Pedigree':'pedigree',
#                  'Line':'Line',
#                  'Heading_Date':'Hd_date_jd',
#                  'Maturity_Date':'maturity_dates',
#                  'Days2Heading':'DH',
#                  'Days2Maturity':'DM',
#                  'Lodging':'Lodging_images'
                }


for date in field_year_dict[field]:
    temp_df = locals()[field[:-4]+date].copy()
    temp_ref_df = ref_df_yield.copy()
    
    temp_df.set_index(field_plot_id, inplace=True)
    temp_ref_df.set_index(red_df_id, inplace=True)
    for field_df_col, ref_df_col in cols2add_dict.items():
        temp_df[field_df_col]=temp_ref_df[ref_df_col]
    new_df_name = field[:-4]+date+'_yield'
    locals()[new_df_name] = temp_df.reset_index()
    df_w_yields.append(new_df_name)
    print(new_df_name)
    display(locals()[new_df_name].head())

Graminor_010720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,151,0.05252161063254,0.14502110332251,1.43700313568115,0.06502427533269,0.35323092341423,708.4544971537003,1,1,23,GN20540,SW71139/GN07501
1,152,0.05525885522366,0.14930035918951,1.46168011426926,0.06863539665937,0.3612810075283,666.2278557874763,1,1,19,GN20536,SW51069/GN03509
2,153,0.054916029796,0.1483895033598,1.4244921207428,0.07078363746405,0.3630309253931,654.7114990512335,1,1,17,GN20534,GN04526/GN08581
3,154,0.05004250630736,0.1472183316946,1.43727421760559,0.06284533441067,0.35865554213524,621.4861480075901,1,1,12,GN20529,Krabat/GN08536
4,155,0.04993262328207,0.14262174814939,1.33971440792084,0.06338872760534,0.34286560118198,674.96440227704,1,1,2,Demonstrant,T1005/NK93512


Graminor_040720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.01731912605465,0.04214972630143,0.4836061000824,0.0213649943471,0.11029557138681,654.7081593927895,1,1,1,Zebra,Ralle/Dragon
1,102,0.01709566265345,0.04406672716141,0.50077459216118,0.02111094258726,0.11248160898685,709.5954459203036,1,1,19,GN18666,GN04528/GN03509
2,103,0.01724500674754,0.04677460156381,0.50010347366333,0.02177555020899,0.11889328435063,707.1005692599621,1,1,11,GN15590,Demonstrant/SW51114
3,104,0.01585669536144,0.04220090247691,0.50624313950539,0.01899677887559,0.1156235858798,724.958633776091,1,1,5,Mirakel,SW38337/NK98533//NK98535
4,105,0.01848565228283,0.04919448308647,0.56282949447632,0.02177054807544,0.12781217694283,740.978368121442,1,1,24,GN18751,QUARNA/GN03531


Graminor_040820_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.02888692356646,0.0612443909049,0.30749645829201,0.08660537749529,0.16973912715912,654.7081593927895,1,1,1,Zebra,Ralle/Dragon
1,102,0.02778669912368,0.05822783522308,0.28869172930718,0.08270449191332,0.16041847318411,709.5954459203036,1,1,19,GN18666,GN04528/GN03509
2,103,0.02882610075176,0.05806675925851,0.27746567130089,0.08406268805265,0.1553757339716,707.1005692599621,1,1,11,GN15590,Demonstrant/SW51114
3,104,0.03581303358078,0.07255017384887,0.31686189770698,0.09702980145812,0.18787904083729,724.958633776091,1,1,5,Mirakel,SW38337/NK98533//NK98535
4,105,0.03172615915537,0.07085644453764,0.34768979251385,0.08739325404167,0.18942515552044,740.978368121442,1,1,24,GN18751,QUARNA/GN03531


Graminor_070720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.03208063915372,0.07948572933674,0.29145963490009,0.0455450154841,0.12723475694656,654.7081593927895,1,1,1,Zebra,Ralle/Dragon
1,102,0.03192751482129,0.07145847380161,0.26160323619843,0.03559897840023,0.1215837597847,709.5954459203036,1,1,19,GN18666,GN04528/GN03509
2,103,0.0299376565963,0.06507416069508,0.28924092650414,0.03257780894637,0.11390871554613,707.1005692599621,1,1,11,GN15590,Demonstrant/SW51114
3,104,0.03123905044049,0.07095941528678,0.33665755391121,0.03552260808647,0.1193008236587,724.958633776091,1,1,5,Mirakel,SW38337/NK98533//NK98535
4,105,0.03136086650193,0.07083886489272,0.29493974149227,0.03595094382763,0.11881457641721,740.978368121442,1,1,24,GN18751,QUARNA/GN03531


Graminor_130720_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.01943485252559,0.05616546049714,0.03275330737233,0.13417160511017,0.42078544199467,654.7081593927895,1,1,1,Zebra,Ralle/Dragon
1,102,0.01763908099383,0.04844906181097,0.02694509271532,0.12227071076632,0.42546781897545,709.5954459203036,1,1,19,GN18666,GN04528/GN03509
2,103,0.01708567328751,0.05166833475232,0.02986929006875,0.12535209953785,0.42028772830963,707.1005692599621,1,1,11,GN15590,Demonstrant/SW51114
3,104,0.01794050540775,0.05460357666016,0.02966742496938,0.13090482354164,0.42073749005794,724.958633776091,1,1,5,Mirakel,SW38337/NK98533//NK98535
4,105,0.01588864158839,0.04833815060556,0.02423826511949,0.12712229043245,0.43655633926392,740.978368121442,1,1,24,GN18751,QUARNA/GN03531


Graminor_140820_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.05545852705836,0.10966078937054,0.32879169285297,0.16714072972536,0.20293719321489,654.7081593927895,1,1,1,Zebra,Ralle/Dragon
1,102,0.04171454161406,0.08938934281468,0.3082974255085,0.13603235781193,0.18076609820128,709.5954459203036,1,1,19,GN18666,GN04528/GN03509
2,103,0.04265864379704,0.08891965448856,0.28528659045696,0.12782566994429,0.16898018121719,707.1005692599621,1,1,11,GN15590,Demonstrant/SW51114
3,104,0.04533242620528,0.093895111233,0.30263090133667,0.13479708880186,0.17875427007675,724.958633776091,1,1,5,Mirakel,SW38337/NK98533//NK98535
4,105,0.03555231168866,0.07677070051432,0.27819603681564,0.11304578930139,0.15748612582684,740.978368121442,1,1,24,GN18751,QUARNA/GN03531


Graminor_170720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,151,0.02228823490441,0.06793008744717,0.49244546890259,0.03432929515839,0.15943202376366,708.4544971537003,1,1,23,GN20540,SW71139/GN07501
1,152,0.02431623544544,0.07310711592436,0.47009554505348,0.04503541067243,0.16368761658668,666.2278557874763,1,1,19,GN20536,SW51069/GN03509
2,153,0.02390163391828,0.07059001177549,0.4781693816185,0.0388567969203,0.1628008633852,654.7114990512335,1,1,17,GN20534,GN04526/GN08581
3,154,0.02424799464643,0.07628740742803,0.4903377443552,0.04275009967387,0.17022757232189,621.4861480075901,1,1,12,GN20529,Krabat/GN08536
4,155,0.02308773249388,0.06872000545263,0.44838109612465,0.0383533872664,0.15390978753567,674.96440227704,1,1,2,Demonstrant,T1005/NK93512


Graminor_180620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,151,0.02500713057816,0.06103284284473,0.49470546841621,0.02360297739506,0.1478226184845,708.4544971537003,1,1,23,GN20540,SW71139/GN07501
1,152,0.02698950842023,0.06481597572565,0.49543772637844,0.0261138305068,0.14868681132793,666.2278557874763,1,1,19,GN20536,SW51069/GN03509
2,153,0.02630499657243,0.06199061125517,0.48579984903336,0.02458008751273,0.13743014633656,654.7114990512335,1,1,17,GN20534,GN04526/GN08581
3,154,0.02678878419101,0.0621184296906,0.46459522843361,0.02538747154176,0.14423263072967,621.4861480075901,1,1,12,GN20529,Krabat/GN08536
4,155,0.02933445945382,0.06688608974218,0.53210029006004,0.02741109300405,0.1524418592453,674.96440227704,1,1,2,Demonstrant,T1005/NK93512


Graminor_200720_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,151,0.02052104379982,0.06073131039739,0.03193806298077,0.14240511506796,0.38521130383015,708.4544971537003,1,1,23,GN20540,SW71139/GN07501
1,152,0.02352127619088,0.0667671635747,0.04440850950778,0.15232443064451,0.37749837338924,666.2278557874763,1,1,19,GN20536,SW51069/GN03509
2,153,0.02119263634086,0.06027925387025,0.03469301760197,0.14231227338314,0.3712010383606,654.7114990512335,1,1,17,GN20534,GN04526/GN08581
3,154,0.02113559655845,0.06225795857608,0.03737545385957,0.14357797801495,0.37263041734695,621.4861480075901,1,1,12,GN20529,Krabat/GN08536
4,155,0.01975152548402,0.05702294781804,0.03292134776712,0.13056176155806,0.33940501511097,674.96440227704,1,1,2,Demonstrant,T1005/NK93512


Graminor_240620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,144,0.0251169288531,0.05883832648397,0.39436191320419,0.02690711151808,0.11709716543555,668.1017077798862,1,4,11,GN19529,GN05567/Bjarne
1,145,0.02271206118166,0.04885277338326,0.36530631780624,0.0254563651979,0.10728264600039,752.1396584440228,1,4,2,Demonstrant,T1005/NK93512
2,146,0.0216562282294,0.04802346602082,0.32287809252739,0.02287205122411,0.10473021864891,651.1628083491462,1,5,10,GN19524,GN08595/Berserk
3,147,0.02048542350531,0.04710882157087,0.31554086506367,0.02204723563045,0.10316145047545,693.3130929791272,1,5,7,GN19604,Sommerset/GN08596
4,148,0.02118466049433,0.04753368720412,0.38023635745049,0.02270166389644,0.10553495585918,716.2922201138521,1,5,8,GN19606,NK01513/Sommerset


Graminor_300720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree
0,101,0.02325643971562,0.06081332825124,0.27885347604752,0.05478893965483,0.13804723322391,654.7081593927895,1,1,1,Zebra,Ralle/Dragon
1,102,0.02383422106504,0.06415249407291,0.27774173021317,0.05711416527629,0.14228685200214,709.5954459203036,1,1,19,GN18666,GN04528/GN03509
2,103,0.02948808670044,0.0838436409831,0.32662737369537,0.06626740098,0.18030296266079,707.1005692599621,1,1,11,GN15590,Demonstrant/SW51114
3,104,0.02294030319899,0.06694783270359,0.31512682139873,0.05006634443998,0.15148229151964,724.958633776091,1,1,5,Mirakel,SW38337/NK98533//NK98535
4,105,0.02065573260188,0.0561890900135,0.25203633308411,0.05744943767786,0.13080714643002,740.978368121442,1,1,24,GN18751,QUARNA/GN03531


### Vollebekk Masbasis_2019

In [192]:
yield_src_dict['Masbasis_2019_Vollebekk']

'df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data'

In [193]:
df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data.head()

Unnamed: 0,Rute,Plot,Design,Rep,Block,PLT,Col,Nursery,Line,MASBASIS 2015,Name,Fra forsøk,Nr.,Høste aks,Luket,Kommentar,HD,MAT,DH,DM,PH_1,PH_2,PH,Rute.1,Vekt i gram,Vekt inn kg,Vekt ut kg,Unnamed: 27,GrainYield,Protein,TW,TKW
0,1101,1,alpha-lattice,1,1,1,1,MASBASIS,1574,1574,GN12687,18BMLGI1,1256,,1.0,,28,8,66,107,92,92,92.0,1101,4390,4.39,3.92,1.11989795918367,522.6666666666665,11.1,79.0,34.89795918367347
1,1102,2,alpha-lattice,1,1,2,2,MASBASIS,28,1073,Avocet YrA,18BMLGI1,1330,,1.0,,31,11,69,110,86,86,86.0,1102,3350,3.55,2.91,1.21993127147766,388.0,11.5,82.0,41.02826086956522
2,1103,3,alpha-lattice,1,1,3,3,MASBASIS,1313,1313,GN08557,18BMLGI1,1401,,1.0,,32,9,70,108,87,87,87.0,1103,4550,4.55,4.06,1.12068965517241,541.3333333333334,11.4,77.0,33.89835164835165
3,1104,4,fill,1,1,4,4,MASBASIS,1311,1311,GN08541,18BMLGI1,1109,,1.0,,31,10,69,109,96,96,96.0,1104,4800,4.8,4.29,1.11888111888112,572.0,10.9,76.0,34.68047337278106
4,1105,5,alpha-lattice,1,1,5,5,MASBASIS,1324,1324,SW44431,18BMLGI1,1459,,1.0,,29,7,67,106,98,98,98.0,1105,4530,4.53,4.07,1.11302211302211,542.6666666666667,11.1,79.2,31.94091903719912


In [194]:
 Masbasis_2019_Vollebekk_yield = df_2019_Vollebekk_Masbasis_x_19BMLGI1___MASBASIS_yield_trial_Vollebekk_2019_x_data.copy()

In [195]:
field = 'Masbasis_2019'
field_plot_id = 'Plot_ID'
ref_df_yield = Masbasis_2019_Vollebekk_yield
red_df_id = 'Rute'

cols2add_dict = {'GrainYield':'GrainYield',
                 'Replicates':'Rep',
                 'Block':'Block',
#                  'iBlock':'ibloc',
#                  'Entry':'entry',
                 'Name':'Name',
#                  'CodeName':'CodeName',
#                  'Pedigree':'pedigree',
                 'Line':'Line',
#                  'Heading_Date':'Hd_date_jd',
#                  'Maturity_Date':'maturity_dates',
                 'Days2Heading':'DH',
                 'Days2Maturity':'DM',
#                  'Lodging':'Lodging_images'
                }


for date in field_year_dict[field]:
    temp_df = locals()[field[:-4]+date].copy()
    temp_ref_df = ref_df_yield.copy()
    
    temp_df.set_index(field_plot_id, inplace=True)
    temp_ref_df.set_index(red_df_id, inplace=True)
    for field_df_col, ref_df_col in cols2add_dict.items():
        temp_df[field_df_col]=temp_ref_df[ref_df_col]
    new_df_name = field[:-4]+date+'_yield'
    locals()[new_df_name] = temp_df.reset_index()
    df_w_yields.append(new_df_name)
    print(new_df_name)
    display(locals()[new_df_name].head())

Masbasis_050719_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Days2Heading,Days2Maturity
0,1101,0.01724164374173,0.04960502684116,0.01648067310453,0.14002199470997,0.51743096113205,522.6666666666665,1,1,GN12687,1574,66,107
1,1102,0.0182522367686,0.0589012503624,0.01840719208121,0.15489521622658,0.48081737756729,388.0,1,1,Avocet YrA,28,69,110
2,1103,0.01846960186958,0.05806533619762,0.01639430783689,0.15941980481148,0.54950857162476,541.3333333333334,1,1,GN08557,1313,70,108
3,1104,0.01832564175129,0.05711439438164,0.01757482159883,0.15400582551956,0.5361188352108,572.0,1,1,GN08541,1311,69,109
4,1105,0.0174199026078,0.05207008123398,0.01628712192178,0.14683654159307,0.54055520892143,542.6666666666667,1,1,SW44431,1324,67,106


Masbasis_060619_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Days2Heading,Days2Maturity
0,1101,0.03428133949637,0.09747382625937,0.64849689602852,0.04930241033435,0.2093452885747,522.6666666666665,1,1,GN12687,1574,66,107
1,1102,0.0377445332706,0.10293141752481,0.5740105509758,0.05805788189173,0.20425572991371,388.0,1,1,Avocet YrA,28,69,110
2,1103,0.03256499394774,0.10697019100189,0.78791910409927,0.0395550429821,0.23605041205883,541.3333333333334,1,1,GN08557,1313,70,108
3,1104,0.0302531439811,0.09503712505102,0.70661836862564,0.03893245011568,0.20328848063946,572.0,1,1,GN08541,1311,69,109
4,1105,0.03078365232796,0.09397916123271,0.67165452241898,0.04058351926506,0.18996452540159,542.6666666666667,1,1,SW44431,1324,67,106


Masbasis_070819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Days2Heading,Days2Maturity
0,1101,0.02745081204921,0.06774421036243,0.10468378663063,0.17041145265102,0.29423217475414,522.6666666666665,1,1,GN12687,1574,66,107
1,1102,0.02254099585116,0.06282230466604,0.07340057194233,0.1519963145256,0.28168618679047,388.0,1,1,Avocet YrA,28,69,110
2,1103,0.02302111312747,0.05578975751996,0.08889077603817,0.1499330252409,0.27658405900001,541.3333333333334,1,1,GN08557,1313,70,108
3,1104,0.02876975573599,0.06988473236561,0.09660045802593,0.17536590993404,0.31517273187637,572.0,1,1,GN08541,1311,69,109
4,1105,0.03374515287578,0.07753943651915,0.12545649707317,0.19125919044018,0.31620867550373,542.6666666666667,1,1,SW44431,1324,67,106


Masbasis_150719_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Days2Heading,Days2Maturity
0,1101,0.0286360019818,0.09165789186954,0.79030305147171,0.03383398987353,0.21497885137796,522.6666666666665,1,1,GN12687,1574,66,107
1,1102,0.03227930143475,0.10205971822143,0.72386369109154,0.04081311076879,0.22214722633362,388.0,1,1,Avocet YrA,28,69,110
2,1103,0.02853017859161,0.10857990011573,0.88674500584602,0.03252696059644,0.24907822906971,541.3333333333334,1,1,GN08557,1313,70,108
3,1104,0.02475075609982,0.09109020978212,0.85229158401489,0.02792157977819,0.21030612289906,572.0,1,1,GN08541,1311,69,109
4,1105,0.02649083547294,0.09176889061928,0.75997465848923,0.03209586814046,0.20007827877998,542.6666666666667,1,1,SW44431,1324,67,106


Masbasis_220719_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Days2Heading,Days2Maturity
0,1101,0.0157086243853,0.05447825603187,0.02310157939792,0.12039437144995,0.42018637061119,522.6666666666665,1,1,GN12687,1574,66,107
1,1102,0.01484294328839,0.05363885313272,0.02193004637957,0.11383569985628,0.37882936000824,388.0,1,1,Avocet YrA,28,69,110
2,1103,0.01537581160665,0.05218951404095,0.02080256305635,0.11503638327122,0.43057475984096,541.3333333333334,1,1,GN08557,1313,70,108
3,1104,0.01851161196828,0.06167223677039,0.02580936439335,0.12668533623219,0.44818541407585,572.0,1,1,GN08541,1311,69,109
4,1105,0.01871474646032,0.063277810812,0.02789462357759,0.13115163147449,0.44348394870758,542.6666666666667,1,1,SW44431,1324,67,106


Masbasis_260619_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Days2Heading,Days2Maturity
0,1101,0.03095577098429,0.12798850238323,0.9775162935257,0.03774085268378,0.26575493812561,522.6666666666665,1,1,GN12687,1574,66,107
1,1102,0.03348666243255,0.14784204214811,0.89456272125244,0.04386007040739,0.2866637557745,388.0,1,1,Avocet YrA,28,69,110
2,1103,0.03492675721645,0.15189448744059,1.03863137960434,0.03934961184859,0.2989099919796,541.3333333333334,1,1,GN08557,1313,70,108
3,1104,0.03162729181349,0.13321483880281,0.99349030852318,0.03911580704153,0.26759114861488,572.0,1,1,GN08541,1311,69,109
4,1105,0.03033981844783,0.12747666239739,0.96157884597778,0.0368860065937,0.25736233592033,542.6666666666667,1,1,SW44431,1324,67,106


Masbasis_280619_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Days2Heading,Days2Maturity
0,1101,0.02367428131401,0.08945537358522,0.76167321205139,0.02980425395071,0.20838125050068,522.6666666666665,1,1,GN12687,1574,66,107
1,1102,0.02565237414092,0.10292289406061,0.70238134264946,0.03413953073323,0.22466614842415,388.0,1,1,Avocet YrA,28,69,110
2,1103,0.02580847032368,0.10208440572023,0.80882775783539,0.03056399896741,0.22935511171818,541.3333333333334,1,1,GN08557,1313,70,108
3,1104,0.02333709970117,0.089539937675,0.75378638505936,0.02965806797147,0.20410184562206,572.0,1,1,GN08541,1311,69,109
4,1105,0.02328722178936,0.09000187367201,0.76559513807297,0.02922498993576,0.20702770352364,542.6666666666667,1,1,SW44431,1324,67,106


Masbasis_290719_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Days2Heading,Days2Maturity
0,1101,0.02076234668493,0.05774863436818,0.31896787881851,0.04862806946039,0.14668506383896,522.6666666666665,1,1,GN12687,1574,66,107
1,1102,0.01801653578877,0.05083964206278,0.29662472009659,0.03489565104246,0.12536652386188,388.0,1,1,Avocet YrA,28,69,110
2,1103,0.01910516247153,0.05246903374791,0.31209948658943,0.03918173536658,0.13249918818474,541.3333333333334,1,1,GN08557,1313,70,108
3,1104,0.02229550667107,0.05808311887085,0.33125439286232,0.04216115549207,0.13887896388769,572.0,1,1,GN08541,1311,69,109
4,1105,0.02452539280057,0.06545928120613,0.31769314408302,0.05690658837557,0.15659973025322,542.6666666666667,1,1,SW44431,1324,67,106


### Vollebekk Masbasis 2020

In [196]:
yield_src_dict['Masbasis_2020_Vollebekk']

'df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data'

In [197]:
df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data.columns

Index(['Rute', 'Type', 'Rep', 'Block ', 'Col', 'Row', 'Nursery', 'Line',
       'MASBASIS 2015', 'Name',
       ...
       'Unnamed: 1009', 'Unnamed: 1010', 'Unnamed: 1011', 'Unnamed: 1012',
       'Unnamed: 1013', 'Unnamed: 1014', 'Unnamed: 1015', 'Unnamed: 1016',
       'Unnamed: 1017', 'Unnamed: 1018'],
      dtype='object', length=1019)

In [198]:
Masbasis_2020_Vollebekk_yield = df_2020_Vollebekk_Masbasis_x_20BMLGI1_2020_tm_x_data.copy()

In [199]:
field = 'Masbasis_2020'
field_plot_id = 'Plot_ID'
ref_df_yield = Masbasis_2020_Vollebekk_yield
red_df_id = 'Rute'

cols2add_dict = {'GrainYield':'GrainYield',
                 'Replicates':'Rep',
                 'Block':'Block ',
#                  'iBlock':'ibloc',
#                  'Entry':'entry',
                 'Name':'Name',
#                  'CodeName':'CodeName',
#                  'Pedigree':'pedigree',
                 'Line':'Line',
#                  'Heading_Date':'Hd_date_jd',
                 'Maturity_Date':'maturity_dates',
                 'Days2Heading':'DH',
                 'Days2Maturity':'DM',
                 'Lodging':'Lodging_images'
                }



for date in field_year_dict[field]:
    temp_df = locals()[field[:-4]+date].copy()
    temp_ref_df = ref_df_yield.copy()
    
    temp_df.set_index(field_plot_id, inplace=True)
    temp_ref_df.set_index(red_df_id, inplace=True)
    for field_df_col, ref_df_col in cols2add_dict.items():
        temp_df[field_df_col]=temp_ref_df[ref_df_col]
    new_df_name = field[:-4]+date+'_yield'
    locals()[new_df_name] = temp_df.reset_index()
    df_w_yields.append(new_df_name)
    print(new_df_name)
    display(locals()[new_df_name].head())

Masbasis_010720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.01591940969229,0.04004546999931,0.45509791374206,0.01758708804846,0.10015922039747,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.01459743734449,0.03886682912707,0.47424706816673,0.01632172986865,0.10415577888489,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.01679942943156,0.04042305797339,0.42398262023926,0.0210672095418,0.1004450917244,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.01616109348834,0.0423381049186,0.47935003042221,0.01779858022928,0.10866297408938,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.01539558358491,0.04040565900505,0.46358992159367,0.01758266147226,0.10410747304559,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_070820_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.04086912795901,0.09096296876669,0.31892809271812,0.10894840955734,0.18498109281063,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.05080563575029,0.1227527409792,0.41564214229584,0.12473668903112,0.24275913834572,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.06554494425654,0.13410246372223,0.35386978089809,0.17264156043529,0.23937165737152,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.0309294834733,0.08369535952807,0.33665832877159,0.07409231364727,0.17520970106125,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.03470029123128,0.08698018267751,0.32181885838509,0.09760199487209,0.18198852986097,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_080720_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.01402835361659,0.03620908036828,0.01667431369424,0.0976048335433,0.46333000063896,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.01307204086334,0.03570323809981,0.01592568680644,0.10280881822109,0.49403023719788,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.0294326171279,0.06715109199286,0.03764823637903,0.15841406583786,0.51798620820046,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.01491118315607,0.03973998501897,0.01794897671789,0.10917155444622,0.48810102045536,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.01484246226028,0.03857311606407,0.01838943362236,0.10612838342786,0.47808191180229,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_120820_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.04214221425355,0.08147255703807,0.28164222836494,0.13377510756254,0.17055214941502,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.04688263311982,0.09263426810503,0.32047480344772,0.14142593741417,0.19662092626095,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.0552837960422,0.10597278177738,0.34046250581741,0.17595182359219,0.21433568000793,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.03823359310627,0.08165599033236,0.30164931714535,0.10967472940683,0.17836789041758,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.03904603235424,0.07847819849849,0.28058490157127,0.12050274014473,0.16950686275959,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_130720_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.0030966483755,0.00743585056625,0.00340393697843,0.01917712856084,0.09398687258363,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.00282198062632,0.00714045623317,0.00308203196619,0.01979616191238,0.10167077928781,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.00475424760953,0.01047174679115,0.00552832544781,0.02479296177626,0.08975387737155,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.00310867279768,0.00760369934142,0.00330586987548,0.02020131796598,0.09762462973595,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.00311224989127,0.00748728215694,0.0034374173265,0.01995011791587,0.09622761234641,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_140820_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.04457513615489,0.08410543948412,0.26709419488907,0.13423332571983,0.16420239210129,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.04044235125184,0.07832991331816,0.27064278721809,0.12140583246946,0.16154104471207,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.03908966295421,0.07075476273894,0.22659781575203,0.10667786002159,0.13415043801069,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.03776586428285,0.07734780013561,0.27263230085373,0.1080602183938,0.1641256660223,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.03717124834657,0.07275756075978,0.2524222433567,0.11053347215056,0.15119008719921,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_170720_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.01524927746504,0.04235019907355,0.02025532815605,0.09704588353634,0.38951104879379,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.01433407794684,0.04232615604997,0.01836482435465,0.1038099899888,0.43227532505989,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.03183255717158,0.07925609499216,0.04394743591547,0.16375847905874,0.44937327504158,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.015720365569,0.04540125094354,0.01951180025935,0.10668126121163,0.43081989884376,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.01562240626663,0.04381185770035,0.0199053697288,0.10283801704645,0.41685166954994,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_180620_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1101,0.01375245722011,0.0391886588186,0.01362431282178,0.11802165210247,0.44460704922676,,1.0,1.0,MS 273-150,26.0,NaT,67,,
1,1102,0.01190277514979,0.03491909988225,0.0122546562925,0.10733427107334,0.42154078185558,,1.0,1.0,Sabin,1322.0,NaT,65,,
2,1103,0.0183435510844,0.04444529116154,0.01833361946046,0.12285278737545,0.4506431221962,,1.0,1.0,T2038,25.0,NaT,65,,
3,1104,0.01546917203814,0.04063344746828,0.01583779789507,0.1147592663765,0.42881932854652,,,,Bastian,,NaT,65,,
4,1105,0.01513662189245,0.03752589970827,0.01491108024493,0.10963937267661,0.43176732957363,713.3333333333334,1.0,1.0,T9040,6.0,2020-08-11,66,88.0,0.0


Masbasis_220720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.01983306277543,0.05393978022039,0.4122980684042,0.02703440468758,0.12720087915659,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.01720278151333,0.05074172094464,0.39697626233101,0.03082827664912,0.12266058474779,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.02053224667907,0.05466695316136,0.4327172935009,0.02849092241377,0.12945248186588,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.01933290623128,0.05337260663509,0.41434514522553,0.02805849537253,0.12147465348244,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.02068077027798,0.05325965583324,0.40077504515648,0.03047803603113,0.12492926046252,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_240620_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.01521220896393,0.03487180918455,0.01443475484848,0.09333180636167,0.40641325712204,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.0128291156143,0.03250118345022,0.01261149765924,0.09257266670465,0.4008042961359,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.01308786636218,0.02784275263548,0.01342038996518,0.07375760003924,0.34286007285118,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.01478810794652,0.03473927825689,0.01417340431362,0.09768238663673,0.41346660256386,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.01381318224594,0.03199907578528,0.0135073820129,0.08787251636386,0.38723370432854,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_260620_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.01110563054681,0.0254636509344,0.01226920820773,0.07541254907846,0.43113344907761,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.0118602802977,0.02762216236442,0.01390849240124,0.08364863321185,0.48069626092911,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.01466838736087,0.03315686434507,0.01731318607926,0.09402611851692,0.49784618616104,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.01227626856416,0.02862842939794,0.01400806568563,0.08514220267534,0.46704971790314,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.01383561547846,0.03266096115112,0.01590844430029,0.09346697479487,0.50319483876228,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


Masbasis_300720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Replicates,Block,Name,Line,Maturity_Date,Days2Heading,Days2Maturity,Lodging
0,1378,0.02410079725087,0.06607007235289,0.34780168533325,0.0513224080205,0.15426203608513,740.0,1.0,36.0,GN04528,53.0,2020-08-12,67,89.0,0.0
1,1379,0.03252369910479,0.09256774932146,0.45134079456329,0.06403701007366,0.20749270915985,729.3333333333333,1.0,36.0,GN12625,1530.0,2020-08-16,68,93.0,0.0
2,1380,0.04003740102053,0.10463583469391,0.34994295239448,0.08449927344918,0.21405462175608,534.6666666666665,1.0,36.0,DH20070,93.0,2020-08-10,65,87.0,80.0
3,1381,0.0189337618649,0.05961094796658,0.36552077531815,0.03767672367394,0.14549421519041,745.3333333333333,1.0,36.0,GONDO,30.0,2020-08-18,72,95.0,0.0
4,1382,0.02116639912128,0.06466833129525,0.36169295012951,0.04673925973475,0.15684277564287,661.3333333333334,1.0,36.0,Pfau/Milan,17.0,2020-08-16,68,93.0,0.0


### Staur 2019

Since here we have to take the yield data from two separate files, for graminor and Masbasis, the yield columns that are present in both yield datasets have to be named differently and later merged together. Also, while doing the process on the second field,i.e. Masbasis, remember to take the processed datasets from the first setp, instead to taking the unprocessed datasets; as done for the first field.

In [200]:
yield_src_dict
yield_src_dict['Staur_Graminor_2019']
yield_src_dict['Staur_Masbasis_2019']

'df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data'

#### Graminor

In [201]:
df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res
# df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8,Unnamed: 9,Pl.,Akssk.,Gulm.,Avling,Unnamed: 14,Unnamed: 15,Unnamed: 16,Unnamed: 17,Unnamed: 18,Unnamed: 19,Unnamed: 20,Unnamed: 21,Unnamed: 22,Unnamed: 23,Unnamed: 24,Unnamed: 25,Unnamed: 26,Unnamed: 27,Unnamed: 28,Unnamed: 29,Unnamed: 30,Unnamed: 31,Unnamed: 32,Unnamed: 33,Unnamed: 34
0,r_expt,r_location,plot,bloc,ibloc,entry,,name,pedigree,Legde,høyde,Juli,Aug.,kg/daa,,,,,,,,,,,,,,,,,,,,,
1,19T1A 2,Staur,1301,1,1,14,,GN12760,NK02529/NK01533//NK01568,20,,22,,583.00721062618595,,,,,,,,,,,,,,,,,,,,,
2,19T1A 2,Staur,1302,1,1,2,,Demonstrant,T1005/NK93512,40,,23,,473.86110056925997,,,,,,,,,,,,,,,,,,,,,
3,19T1A 2,Staur,1303,1,1,11,,GN14547,SW45126/NK01533,30,,20,,511.10436432637567,,,,,,,,,,,,,,,,,,,,,
4,19T1A 2,Staur,1304,1,1,15,,GN15549,SW45126/GN03529,30,,21,,512.30360531309304,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
796,19T17UB2,Staur,1471,2,,5,,Happy,Amaretto/SW45530,80,108,26,,510.12296015180272,,,,,,,,,,,,,,,,,,,,,
797,19T17UB2,Staur,1472,2,,20,,Mamut,Triticale,40,105,22,,444.03415559772304,,,,,,,,,,,,,,,,,,,,,
798,19T17UB2,Staur,1473,2,,25,,SEC542-09-9,Triticale,30,110,25,,481.89753320683116,,,,,,,,,,,,,,,,,,,,,
799,19T17UB2,Staur,1474,2,,23,,DC11136,,60,103,23,,410.30132827324485,,,,,,,,,,,,,,,,,,,,,


In [202]:
# Dropping first row/ unnamed column headings and replacing them with the second row.
Graminor_2019_Staur_temp = df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res.copy()
Graminor_2019_Staur_temp.columns = df_2019_Vollebekk_Graminor_x_19TvPhenores_x_Staur_res.iloc[0]
Graminor_2019_Staur_yield = Graminor_2019_Staur_temp[1:]
Graminor_2019_Staur_yield.head()

Unnamed: 0,r_expt,r_location,plot,bloc,ibloc,entry,NaN,name,pedigree,Legde,høyde,Juli,Aug.,kg/daa,NaN.1,NaN.2,NaN.3,NaN.4,NaN.5,NaN.6,NaN.7,NaN.8,NaN.9,NaN.10,NaN.11,NaN.12,NaN.13,NaN.14,NaN.15,NaN.16,NaN.17,NaN.18,NaN.19,NaN.20,NaN.21
1,19T1A 2,Staur,1301,1,1,14,,GN12760,NK02529/NK01533//NK01568,20,,22,,583.007210626186,,,,,,,,,,,,,,,,,,,,,
2,19T1A 2,Staur,1302,1,1,2,,Demonstrant,T1005/NK93512,40,,23,,473.86110056926,,,,,,,,,,,,,,,,,,,,,
3,19T1A 2,Staur,1303,1,1,11,,GN14547,SW45126/NK01533,30,,20,,511.1043643263757,,,,,,,,,,,,,,,,,,,,,
4,19T1A 2,Staur,1304,1,1,15,,GN15549,SW45126/GN03529,30,,21,,512.3036053130929,,,,,,,,,,,,,,,,,,,,,
5,19T1A 2,Staur,1305,1,1,23,,SG-U784-16,,80,,20,,488.59203036053134,,,,,,,,,,,,,,,,,,,,,


#### Dropping entries with missing plot_ID /Nan entries

In [203]:
staur20_temp = Graminor_2019_Staur_yield.copy()

# Dropping entries with missing plot_ID 
staur20_temp.dropna(subset=['plot'], inplace=True)
Graminor_2019_Staur_yield.shape, staur20_temp.shape

((800, 35), (800, 35))

#### Dropping Duplicate entries

In [204]:
# Finding identical entries in Plot_ID column
duplicates_all = staur20_temp[staur20_temp.duplicated(subset='plot')]['plot']

if duplicates_all.size > 0:
    # Dropping one of the two identical duplicates entries
    staur20_temp.drop_duplicates(inplace=True)
    
    duplicates_non_ident = staur20_temp[staur20_temp.duplicated(subset='plot')]['plot']
    identical_dup_dropped = duplicates_all.size - duplicates_non_ident.size
    print(f'{identical_dup_dropped}     identical duplicate entries dropped.')
    
#     print(duplicates_all.size, duplicates_non_ident.size, identical_dup_dropped)

    # Dropping both entries for non-identical duplicate entries
    staur20_temp.drop_duplicates(subset = 'plot', keep=False, inplace=True)
    print(f'{duplicates_non_ident.size*2} non-identical duplicate entries dropped.')

    print(f'Total     {identical_dup_dropped + duplicates_non_ident.size*2}     duplicate entries deleted.')
# df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res = staur20_temp.copy()

In [205]:
%%time

field = 'Staur_2019'
field_plot_id = 'Plot_ID'
ref_df_yield = Graminor_2019_Staur_yield
red_df_id = 'plot'

cols2add_dict = {'GrainYield':'kg/daa',
#                  'Replicates':'Rep',
                 'Block_G':'bloc',
                 'iBlock':'ibloc',
                 'Entry':'entry',
                 'Name_G':'name',
#                  'CodeName':'CodeName',
                 'Pedigree':'pedigree',
#                  'Line':'Line',
#                  'Heading_Date':'Hd_date_jd',
#                  'Maturity_Date':'Mat_date_jd',
#                  'Days2Heading':'Hd_dto_day',
#                  'Days2Maturity':'Mat_dto_day',
#                  'Lodging':'Lodging'
                }



for date in field_year_dict[field]:
    temp_df = locals()[field[:-4]+date].copy()
    temp_ref_df = ref_df_yield.copy()
    temp_df.set_index(field_plot_id, inplace=True)
    temp_ref_df.set_index(red_df_id, inplace=True)

    for field_df_col, ref_df_col in cols2add_dict.items():
        temp_df[field_df_col]=temp_ref_df[ref_df_col]

    new_df_name = field[:-4]+date+'_yield'
    locals()[new_df_name] = temp_df.reset_index()
    # The new_df_name will be appended in the next step, while adding data for Masbasis
    print(new_df_name)
    display(locals()[new_df_name].head())

Staur_070819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block_G,iBlock,Entry,Name_G,Pedigree
0,101,0.02037221100181,0.05380867421627,0.02753451187164,0.10544827952981,0.50401940941811,,,,,,
1,102,0.01680015865713,0.04350608587265,0.02150516957045,0.08688291162252,0.48089578747749,,,,,,
2,103,0.02003859914839,0.04953648895025,0.02503940276802,0.096275344491,0.5303213596344,,,,,,
3,104,0.01971719227731,0.05685907788575,0.02864102274179,0.11291671171784,0.511001765728,,,,,,
4,105,0.02818777412176,0.06502467393875,0.03368915617466,0.12276341766119,0.56638416647911,,,,,,


Staur_150819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block_G,iBlock,Entry,Name_G,Pedigree
0,101,0.02139185369015,0.06199352443218,0.03193024545908,0.12122006714344,0.48637029528618,,,,,,
1,102,0.0172342993319,0.04688457027078,0.02339296601713,0.0930627733469,0.44429874420166,,,,,,
2,103,0.01945897936821,0.05133783072233,0.02714726515114,0.09860330075026,0.45776697993278,,,,,,
3,104,0.01875521987677,0.05719138123095,0.03205752372742,0.11198773235083,0.42308157682419,,,,,,
4,105,0.03271885588765,0.08598766475916,0.04148830473423,0.15698732435703,0.55752408504486,,,,,,


Staur_210819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block_G,iBlock,Entry,Name_G,Pedigree
0,101,,,,,,,,,,,
1,102,,,,,,,,,,,
2,103,,,,,,,,,,,
3,104,,,,,,,,,,,
4,105,,,,,,,,,,,


Staur_240719_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block_G,iBlock,Entry,Name_G,Pedigree
0,101,0.02107470855117,0.05494951084256,0.02659220248461,0.11439549177885,0.55891990661621,,,,,,
1,102,0.01831944938749,0.04948900081217,0.02264953404665,0.10494910553098,0.5592942237854,,,,,,
2,103,0.0217623077333,0.05476612597704,0.02646759711206,0.11683951318264,0.61783242225647,,,,,,
3,104,0.02083589509129,0.05831184983253,0.02751668170095,0.12521986663342,0.5971736907959,,,,,,
4,105,0.0212326515466,0.05318084731698,0.02654945664108,0.10921031981707,0.56063085794449,,,,,,


Staur_300819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block_G,iBlock,Entry,Name_G,Pedigree
0,101,0.03873255103826,0.11396461725235,0.10482091456652,0.24639148224143,0.53428444266319,,,,,,
1,102,0.02752235345542,0.08799333125353,0.06158534064889,0.19667421166011,0.51942938566208,,,,,,
2,103,0.02946643717587,0.08506578952074,0.07173351943493,0.1878371295055,0.505131483078,,,,,,
3,104,0.02927374653518,0.08424741774797,0.09479934722185,0.1977918342992,0.42404174804688,,,,,,
4,105,0.04285388812423,0.12670333683491,0.09668766707182,0.25213951607769,0.5477414727211,,,,,,


Wall time: 88.8 ms


#### Masbasis

In [206]:
df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data.head()

Unnamed: 0,Rute,Plot,Design,Rep,Block,PLT,Col,Nursery,Line,MASBASIS 2015,Name,Fra forsøk,Nr.,Rute.1,PH,LD1,LD2,LD3,LD_Mean,HD,MAT,DH,DM,Moisture,Yield,TW
0,101,1,alpha-lattice,1,1,1,1,MASBASIS,1521,1521,GN11527,18BMLGI1,1260,101,90,5.0,60,70,65.0,19,5,45.0,91,32.58823529411764,562.8306730661905,70.23529411764706
1,102,2,alpha-lattice,1,1,2,2,MASBASIS,1622,1622,GN14506,18BMLGI1,1223,102,86,0.0,50,60,55.0,23,18,49.0,104,38.0,494.1176470588236,77.11764705882354
2,103,3,alpha-lattice,1,1,3,3,MASBASIS,1609,1609,GN14529,18BMLGI1,1228,103,86,0.0,5,20,12.5,22,15,48.0,101,27.76470588235294,548.2754771737917,74.23529411764706
3,104,4,alpha-lattice,1,1,4,4,MASBASIS,24,1075,NK93604,18BMLGI1,1236,104,78,2.0,10,40,25.0,20,-2,46.0,84,32.0,480.00000000000006,71.70588235294117
4,105,5,fill,1,1,5,5,MASBASIS,94,1120,DH20097,18BMLGI1,1161,105,94,90.0,99,99,99.0,18,8,44.0,94,48.94117647058824,339.4878892733564,76.52941176470588


In [207]:
Masbasis_2019_Staur_yield = df_2019_Staur_Graminor_x_19BMLFN3___MASBASIS_yield_trial_Staur_2019_x_Data.copy()

In [208]:
%%time

field = 'Staur_2019'
field_plot_id = 'Plot_ID'
ref_df_yield = Masbasis_2019_Staur_yield
red_df_id = 'Rute'

cols2add_dict = {'GrainYield_M':'Yield',
                 'Replicates':'Rep',
                 'Block_M':'Block',
#                  'iBlock':'ibloc',
#                  'Entry':'entry',
                 'Name_M':'Name',
#                  'CodeName':'CodeName',
#                  'Pedigree':'pedigree',
                 'Line':'Line',
#                  'Heading_Date':'Hd_date_jd',
#                  'Maturity_Date':'Mat_date_jd',
                 'Days2Heading':'DH',
                 'Days2Maturity':'DM',
#                  'Lodging':'Lodging'
                }



for date in field_year_dict[field]:
    temp_df = locals()[field[:-4]+date+'_yield'].copy()
    temp_ref_df = ref_df_yield.copy()
    temp_df.set_index(field_plot_id, inplace=True)
    temp_ref_df.set_index(red_df_id, inplace=True)

    for field_df_col, ref_df_col in cols2add_dict.items():
        temp_df[field_df_col]=temp_ref_df[ref_df_col]

    temp_df['GrainYield'] = temp_df['GrainYield_M'].fillna(0)+temp_df['GrainYield'].fillna(0)
    temp_df.drop(['GrainYield_M'], axis=1, inplace=True)
    temp_df['Name'] = temp_df['Name_M'].fillna('')+temp_df['Name_G'].fillna('')
    temp_df['Block'] = temp_df['Block_M'].fillna(0)+temp_df['Block_G'].fillna(0)
    temp_df.drop(['Name_M'], axis=1, inplace=True)
    temp_df.drop(['Name_G'], axis=1, inplace=True)
    temp_df.drop(['Block_M'], axis=1, inplace=True)
    temp_df.drop(['Block_G'], axis=1, inplace=True)
    
    new_df_name = field[:-4]+date+'_yield'
    locals()[new_df_name] = temp_df.reset_index()
    df_w_yields.append(new_df_name)
    print(new_df_name)
    display(locals()[new_df_name].head())

Staur_070819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,iBlock,Entry,Pedigree,Replicates,Line,Days2Heading,Days2Maturity,Name,Block
0,101,0.02037221100181,0.05380867421627,0.02753451187164,0.10544827952981,0.50401940941811,562.8306730661905,,,,1.0,1521.0,45.0,91.0,GN11527,1.0
1,102,0.01680015865713,0.04350608587265,0.02150516957045,0.08688291162252,0.48089578747749,494.1176470588236,,,,1.0,1622.0,49.0,104.0,GN14506,1.0
2,103,0.02003859914839,0.04953648895025,0.02503940276802,0.096275344491,0.5303213596344,548.2754771737917,,,,1.0,1609.0,48.0,101.0,GN14529,1.0
3,104,0.01971719227731,0.05685907788575,0.02864102274179,0.11291671171784,0.511001765728,480.00000000000006,,,,1.0,24.0,46.0,84.0,NK93604,1.0
4,105,0.02818777412176,0.06502467393875,0.03368915617466,0.12276341766119,0.56638416647911,339.4878892733564,,,,1.0,94.0,44.0,94.0,DH20097,1.0


Staur_150819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,iBlock,Entry,Pedigree,Replicates,Line,Days2Heading,Days2Maturity,Name,Block
0,101,0.02139185369015,0.06199352443218,0.03193024545908,0.12122006714344,0.48637029528618,562.8306730661905,,,,1.0,1521.0,45.0,91.0,GN11527,1.0
1,102,0.0172342993319,0.04688457027078,0.02339296601713,0.0930627733469,0.44429874420166,494.1176470588236,,,,1.0,1622.0,49.0,104.0,GN14506,1.0
2,103,0.01945897936821,0.05133783072233,0.02714726515114,0.09860330075026,0.45776697993278,548.2754771737917,,,,1.0,1609.0,48.0,101.0,GN14529,1.0
3,104,0.01875521987677,0.05719138123095,0.03205752372742,0.11198773235083,0.42308157682419,480.00000000000006,,,,1.0,24.0,46.0,84.0,NK93604,1.0
4,105,0.03271885588765,0.08598766475916,0.04148830473423,0.15698732435703,0.55752408504486,339.4878892733564,,,,1.0,94.0,44.0,94.0,DH20097,1.0


Staur_210819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,iBlock,Entry,Pedigree,Replicates,Line,Days2Heading,Days2Maturity,Name,Block
0,101,,,,,,562.8306730661905,,,,1.0,1521.0,45.0,91.0,GN11527,1.0
1,102,,,,,,494.1176470588236,,,,1.0,1622.0,49.0,104.0,GN14506,1.0
2,103,,,,,,548.2754771737917,,,,1.0,1609.0,48.0,101.0,GN14529,1.0
3,104,,,,,,480.00000000000006,,,,1.0,24.0,46.0,84.0,NK93604,1.0
4,105,,,,,,339.4878892733564,,,,1.0,94.0,44.0,94.0,DH20097,1.0


Staur_240719_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,iBlock,Entry,Pedigree,Replicates,Line,Days2Heading,Days2Maturity,Name,Block
0,101,0.02107470855117,0.05494951084256,0.02659220248461,0.11439549177885,0.55891990661621,562.8306730661905,,,,1.0,1521.0,45.0,91.0,GN11527,1.0
1,102,0.01831944938749,0.04948900081217,0.02264953404665,0.10494910553098,0.5592942237854,494.1176470588236,,,,1.0,1622.0,49.0,104.0,GN14506,1.0
2,103,0.0217623077333,0.05476612597704,0.02646759711206,0.11683951318264,0.61783242225647,548.2754771737917,,,,1.0,1609.0,48.0,101.0,GN14529,1.0
3,104,0.02083589509129,0.05831184983253,0.02751668170095,0.12521986663342,0.5971736907959,480.00000000000006,,,,1.0,24.0,46.0,84.0,NK93604,1.0
4,105,0.0212326515466,0.05318084731698,0.02654945664108,0.10921031981707,0.56063085794449,339.4878892733564,,,,1.0,94.0,44.0,94.0,DH20097,1.0


Staur_300819_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,iBlock,Entry,Pedigree,Replicates,Line,Days2Heading,Days2Maturity,Name,Block
0,101,0.03873255103826,0.11396461725235,0.10482091456652,0.24639148224143,0.53428444266319,562.8306730661905,,,,1.0,1521.0,45.0,91.0,GN11527,1.0
1,102,0.02752235345542,0.08799333125353,0.06158534064889,0.19667421166011,0.51942938566208,494.1176470588236,,,,1.0,1622.0,49.0,104.0,GN14506,1.0
2,103,0.02946643717587,0.08506578952074,0.07173351943493,0.1878371295055,0.505131483078,548.2754771737917,,,,1.0,1609.0,48.0,101.0,GN14529,1.0
3,104,0.02927374653518,0.08424741774797,0.09479934722185,0.1977918342992,0.42404174804688,480.00000000000006,,,,1.0,24.0,46.0,84.0,NK93604,1.0
4,105,0.04285388812423,0.12670333683491,0.09668766707182,0.25213951607769,0.5477414727211,339.4878892733564,,,,1.0,94.0,44.0,94.0,DH20097,1.0


Wall time: 180 ms


### Staur 2020

In [209]:
yield_src_dict['Staur_2020']

'df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res'

In [210]:
# Dropping first row/ unnamed column headings and replacing them with the second row.
Staur_2020_temp = df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res.copy()
Staur_2020_temp.columns = df_2020_Staur_Graminor_x_Staur_Graminor_Masbasis_2020_x_Res.iloc[0]
Staur_2020_yield = Staur_2020_temp[1:]
Staur_2020_yield.head()

Unnamed: 0,r_expt,r_location,plot,bloc,ibloc,entry,NaN,name,pedigree,Tillers,Lodging,height,Ripe,Water,kg/daa
2,20T7B2,Staur,125,1,1,24,,GN19584,GN06573/SW46375//GN03531,,,55,,16.55971896955504,491.0987779886148
3,20T7B2,Staur,126,1,1,20,,GN19565,SW71142/GN07574,,,68,,17.17962248832961,703.0642277039849
4,20T7B2,Staur,127,1,1,14,,GN19548,GN03509/SW51127,,,58,,16.97054945054945,650.9067779886149
5,20T7B2,Staur,128,1,1,12,,GN19527,GN08595/GN06578,,,66,,16.63372767339364,619.2180948766603
6,20T7B2,Staur,129,1,1,25,,GN19599,Bombona/NK01513//Berserk,,,71,,17.28227194492255,662.4299354838711


In [211]:
%%time

field = 'Staur_2020'
field_plot_id = 'Plot_ID'
ref_df_yield = Staur_2020_yield
red_df_id = 'plot'

cols2add_dict = {'GrainYield':'kg/daa',
#                  'Replicates':'Rep',
                 'Block':'bloc',
                 'iBlock':'ibloc',
                 'Entry':'entry',
                 'Name':'name',
#                  'CodeName':'CodeName',
                 'Pedigree':'pedigree',
#                  'Line':'Line',
#                  'Heading_Date':'Hd_date_jd',
#                  'Maturity_Date':'Mat_date_jd',
#                  'Days2Heading':'Hd_dto_day',
#                  'Days2Maturity':'Mat_dto_day',
                 'Lodging':'Lodging'
                }



for date in field_year_dict[field]:
    temp_df = locals()[field[:-4]+date].copy()
    temp_ref_df = ref_df_yield.copy()
    temp_df.set_index(field_plot_id, inplace=True)
    temp_ref_df.set_index(red_df_id, inplace=True)

    for field_df_col, ref_df_col in cols2add_dict.items():
        temp_df[field_df_col]=temp_ref_df[ref_df_col]

    new_df_name = field[:-4]+date+'_yield'
    locals()[new_df_name] = temp_df.reset_index()
    df_w_yields.append(new_df_name)
    print(new_df_name)
    display(locals()[new_df_name].head())

Staur_090720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree,Lodging
0,2250,0.00328363873996,0.00708982744254,0.06844691932201,0.00370102468878,0.01603136956692,512.8531309297913,2,10,13,GN20708,Berserk-4/CJ9306//Berserk-4,
1,2249,0.00348009588197,0.00740708131343,0.07340282201767,0.00376560306177,0.01729202084243,496.0075901328274,2,10,24,GN20720,SW45126/NK01533,
2,2248,0.00332108337898,0.0065160295926,0.07684666663408,0.00334950676188,0.01503179594874,600.1001897533208,2,10,20,GN20715,Sommerset/NK01513//BAJASS-5,
3,2247,0.00292720342986,0.00613083387725,0.07564453035593,0.00305201567244,0.01463360060006,497.83984819734354,2,10,10,GN20705,Sommerset/Berserk//Berserk,
4,2246,0.00357203674503,0.00758802285418,0.06355807185173,0.00446782726794,0.01699494943023,547.4034155597724,2,10,25,GN20721,GN03509/4/Alt/Ae.squ//2*Seri/3/Avle,


Staur_160720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree,Lodging
0,2250,0.01625120267272,0.04194076731801,0.27575758099556,0.02324975654483,0.08286775648594,512.8531309297913,2,10,13,GN20708,Berserk-4/CJ9306//Berserk-4,
1,2249,0.01719762757421,0.04406964406371,0.29594004154205,0.02291006036103,0.08832266181707,496.0075901328274,2,10,24,GN20720,SW45126/NK01533,
2,2248,0.01661528740078,0.03865727968514,0.30324314534664,0.02038839366287,0.07672333717346,600.1001897533208,2,10,20,GN20715,Sommerset/NK01513//BAJASS-5,
3,2247,0.01447629369795,0.03713286668062,0.30816721916199,0.01865244843066,0.07587007433176,497.83984819734354,2,10,10,GN20705,Sommerset/Berserk//Berserk,
4,2246,0.01659228093922,0.04567619599402,0.24673715233803,0.02990038972348,0.08791254088283,547.4034155597724,2,10,25,GN20721,GN03509/4/Alt/Ae.squ//2*Seri/3/Avle,


Staur_200620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree,Lodging
0,2250,0.00145507103298,0.00332760228775,0.05949476733804,0.00976499635726,0.00729796895757,512.8531309297913,2,10,13,GN20708,Berserk-4/CJ9306//Berserk-4,
1,2249,0.00165975652635,0.00360054406337,0.06422206014395,0.0105953970924,0.00806607771665,496.0075901328274,2,10,24,GN20720,SW45126/NK01533,
2,2248,0.00147166301031,0.00301819387823,0.06681277602911,0.00890355370939,0.00683826394379,600.1001897533208,2,10,20,GN20715,Sommerset/NK01513//BAJASS-5,
3,2247,0.00134751887526,0.00303950835951,0.06720031797886,0.00839543808252,0.00702378945425,497.83984819734354,2,10,10,GN20705,Sommerset/Berserk//Berserk,
4,2246,0.00173605798045,0.00352621322963,0.06344465538859,0.01128523796797,0.00780593184754,547.4034155597724,2,10,25,GN20721,GN03509/4/Alt/Ae.squ//2*Seri/3/Avle,


Staur_240720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree,Lodging
0,2250,0.0238294955343,0.05603430792689,0.32428166270256,0.04514024034142,0.11788533627987,512.8531309297913,2,10,13,GN20708,Berserk-4/CJ9306//Berserk-4,
1,2249,0.02380136307329,0.05834151990712,0.35907800495625,0.04414116963744,0.12500891089439,496.0075901328274,2,10,24,GN20720,SW45126/NK01533,
2,2248,0.02226984128356,0.05058756098151,0.34962192177773,0.03779564797878,0.10924083739519,600.1001897533208,2,10,20,GN20715,Sommerset/NK01513//BAJASS-5,
3,2247,0.02039171103388,0.05223521031439,0.38080188632011,0.03340482525527,0.11387558653951,497.83984819734354,2,10,10,GN20705,Sommerset/Berserk//Berserk,
4,2246,0.02466913778335,0.05685159191489,0.29124157130718,0.04933153837919,0.12051192298532,547.4034155597724,2,10,25,GN20721,GN03509/4/Alt/Ae.squ//2*Seri/3/Avle,


Staur_250620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree,Lodging
0,2250,0.01996915414929,0.04685629159212,0.47510567307472,0.02437139488757,0.10043120384216,512.8531309297913,2,10,13,GN20708,Berserk-4/CJ9306//Berserk-4,
1,2249,0.02187171112746,0.04996774718165,0.51809719204903,0.02572513651103,0.11019213497639,496.0075901328274,2,10,24,GN20720,SW45126/NK01533,
2,2248,0.02087892778218,0.04489376395941,0.53844881057739,0.02337522432208,0.09857240319252,600.1001897533208,2,10,20,GN20715,Sommerset/NK01513//BAJASS-5,
3,2247,0.01995605323464,0.04596696421504,0.54273834824562,0.02309393975884,0.10201548412442,497.83984819734354,2,10,10,GN20705,Sommerset/Berserk//Berserk,
4,2246,0.02449166215956,0.05085353180766,0.48327389359474,0.03120299987495,0.10842384397984,547.4034155597724,2,10,25,GN20721,GN03509/4/Alt/Ae.squ//2*Seri/3/Avle,


Staur_310720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,iBlock,Entry,Name,Pedigree,Lodging
0,2250,0.02219050377607,0.04655896127224,0.24544082581997,0.05090947076678,0.10461443662643,512.8531309297913,2,10,13,GN20708,Berserk-4/CJ9306//Berserk-4,
1,2249,0.02231805585325,0.04842682555318,0.26523873209953,0.04978608712554,0.10869041085243,496.0075901328274,2,10,24,GN20720,SW45126/NK01533,
2,2248,0.02193842362612,0.04491526447237,0.25111445784569,0.04846501164138,0.09986081346869,600.1001897533208,2,10,20,GN20715,Sommerset/NK01513//BAJASS-5,
3,2247,0.02006046567112,0.04747053980827,0.27833522856235,0.04874909296632,0.11196607351303,497.83984819734354,2,10,10,GN20705,Sommerset/Berserk//Berserk,
4,2246,0.02360354922712,0.04892925545573,0.22566667199135,0.05956685170531,0.11178030818701,547.4034155597724,2,10,25,GN20721,GN03509/4/Alt/Ae.squ//2*Seri/3/Avle,


Wall time: 116 ms


### Robot 2020

In [212]:
yield_src_dict['Robot_2020']

'df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw'

In [213]:
Robot_2020_yield = df_2020_Vollebekk_Robot_x_ROBOT_2020_x_raw.copy()
Robot_2020_yield

Unnamed: 0,Name,CodeName,EntryRobot,Masbasis2015ID,YearOfRelease,Location,Season,class,Fertilization,SpatialRute,SpatialRep,SpatialBlock,SpatialCol,SpatialRow,PH_M_cm,Hd_date_jd,Mat_date_jd,Hd_dto_day,Mat_dto_day,GY_Calc_gm2,GW_M_g1000grn,TestWt_M_gl,GProt_NIRS_pct,BiomassAnthesisDry_Calc_gm2,BiomassMaturityDry_Calc_gm2,HarvestIndex_Calc,SpikesPerSquareMeter_M,BiomassAnthesisFresh_Calc_kgm2,BiomassMaturityFresh_Calc_kgm2
0,Avle,,20,1009,1996.0,vollebekk,2020,1,80_kgN_ha,1101,1,1,1,1,71.67,2020-06-21,2020-08-07,62,109,453.6585366,35.01259446,79.12,10.4,570,1130,0.401467731,993,2.22,1.44
1,,GN10637,14,1588,,vollebekk,2020,1,80_kgN_ha,1102,1,1,1,2,71.67,2020-06-21,2020-08-11,62,113,439.0243902,36.60477454,81.84,10.4,580,1170,0.375234522,924,2.11,1.5
2,Runar,,22,1020,1972.0,vollebekk,2020,1,80_kgN_ha,1103,1,1,1,3,81.67,2020-06-19,2020-08-04,60,106,409.7560976,40.92009685,80.92,10.7,610,1140,0.359435173,751,1.97,1.33
3,Betong,GN13618,10,1592,2019.0,vollebekk,2020,1,80_kgN_ha,1104,1,1,1,4,76.67,2020-06-20,2020-08-08,61,110,474.796748,39.84771574,79.76,10.8,610,1240,0.382900603,988,2.18,1.62
4,Reno,,23,1425,1975.0,vollebekk,2020,1,80_kgN_ha,1105,1,1,1,5,88.33,2020-06-20,2020-08-04,61,106,411.3821138,36.62650602,81.28,10.6,580,1200,0.342818428,648,2.03,1.53
5,Mirakel,GN06600,5,1401,2012.0,vollebekk,2020,1,80_kgN_ha,1106,1,1,1,6,88.33,2020-06-20,2020-08-07,61,109,450.4065041,38.3045977,79.32,10.0,570,1100,0.409460458,727,2.09,1.32
6,Felgen,SW21074,18,1594,2019.0,vollebekk,2020,1,80_kgN_ha,1107,1,2,1,7,75.0,2020-06-21,2020-08-11,62,113,440.6504065,37.32718894,82.4,10.3,460,1200,0.367208672,811,1.89,1.54
7,Polkka,,24,1419,1992.0,vollebekk,2020,1,80_kgN_ha,1108,1,2,1,8,76.67,2020-06-22,2020-08-07,63,109,359.3495935,34.7826087,80.2,11.2,500,1040,0.345528455,758,1.82,1.11
8,Seniorita,GN07574,7,1403,2014.0,vollebekk,2020,1,80_kgN_ha,1109,1,2,1,9,75.0,2020-06-21,2020-08-08,62,110,391.8699187,34.40366972,81.36,11.4,480,1130,0.346787539,998,1.77,1.39
9,Arabella,CHD132/05,11,1414,2014.0,vollebekk,2020,1,80_kgN_ha,1110,1,2,1,10,70.0,2020-06-20,2020-08-12,61,114,430.8943089,38.91752577,80.0,10.7,480,1130,0.381322397,977,1.68,1.49


In [214]:
%%time

field = 'Robot_2020'
field_plot_id = 'Plot_ID'
ref_df_yield = Robot_2020_yield
red_df_id = 'SpatialRute'

cols2add_dict = {'GrainYield':'GY_Calc_gm2',
#                  'Replicates':'Rep',
                 'Block':'SpatialBlock',
#                  'iBlock':'ibloc',
#                  'Entry':'entry',
                 'Name':'Name',
                 'CodeName':'CodeName',
#                  'Pedigree':'pedigree',
#                  'Line':'Line',
                 'Heading_Date':'Hd_date_jd',
                 'Maturity_Date':'Mat_date_jd',
                 'Days2Heading':'Hd_dto_day',
                 'Days2Maturity':'Mat_dto_day',
#                  'Lodging':'Lodging_images'
                }



for date in field_year_dict[field]:
    temp_df = locals()[field[:-4]+date].copy()
    temp_ref_df = ref_df_yield.copy()
    
    temp_df.set_index(field_plot_id, inplace=True)
    temp_ref_df.set_index(red_df_id, inplace=True)
    for field_df_col, ref_df_col in cols2add_dict.items():
        temp_df[field_df_col]=temp_ref_df[ref_df_col]
    new_df_name = field[:-4]+date+'_yield'
    locals()[new_df_name] = temp_df.reset_index()
    df_w_yields.append(new_df_name)
    print(new_df_name)
    display(locals()[new_df_name].head())

Robot_010720_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02190509997308,0.05163481459022,0.0281471516937,0.14229789376259,0.46575474739075,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.02810820471495,0.05591040663421,0.03500906378031,0.14514946937561,0.45868730545044,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.02680813707411,0.05718827433884,0.03440085425973,0.14991150796413,0.46871496737003,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.02474952489138,0.04859760403633,0.03062343224883,0.13165853917599,0.45575988292694,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.02928180620074,0.05957726202905,0.0359843634069,0.15357352048159,0.47326892614365,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_040820_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02095003053546,0.05633494257927,0.29884171485901,0.08009007573128,0.14800894260406,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.0330418869853,0.0780663266778,0.31436029076576,0.10332262516022,0.17417079210281,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.03303701430559,0.07975212484598,0.31890341639519,0.1154008358717,0.18229921162128,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.03018244355917,0.07412708550692,0.3156450688839,0.10261926800013,0.17597655951977,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.03381837904453,0.08063637465239,0.31418824195862,0.12161112576723,0.18157379329205,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_070720_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02694103121758,0.0780430957675,0.05971857532859,0.16312286257744,0.36508783698082,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.02407871279865,0.07884780317545,0.04308954626322,0.16936755180359,0.41077435016632,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.0305251032114,0.08606274798513,0.05205285176635,0.17342576384544,0.40699383616447,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.03043309692293,0.08817436546087,0.0587149169296,0.17602185159922,0.39995054900646,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.02607087418437,0.07236006110907,0.04106358811259,0.15312333405018,0.40022003650665,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_120820_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02305869199336,0.04369389638305,0.09206131845713,0.13448163866997,0.27171096205711,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.0361067391932,0.06329665333033,0.11416774243116,0.16331735253334,0.29318436980247,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.03992789238691,0.06801043450832,0.13076311349869,0.17331849038601,0.30204558372498,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.03184960782528,0.05535891652107,0.10538828372955,0.14896349608898,0.27307027578354,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.03886594809592,0.0641232021153,0.12584912031889,0.16454335302115,0.2921976596117,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_130720_yield


Unnamed: 0,Plot_ID,Blue,Green,Red,RedEdge,NIR,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.01833959110081,0.0676969923079,0.02363143395632,0.24615316092968,0.73589861392975,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.0191757613793,0.05139752849936,0.03025839384645,0.13531126827002,0.36123752593994,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.0181386154145,0.05088067054748,0.03184470720589,0.12898110598326,0.34139826893807,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.01596145890653,0.04180620238185,0.02388698607683,0.11116465181112,0.33471828699112,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.02024797163904,0.05622319877148,0.03514767438173,0.13898698985577,0.34808668494224,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_180620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02017218992114,0.04551790095866,0.29107581079006,0.03195643797517,0.10400892794132,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.02476438414305,0.05207933112979,0.26299907267094,0.04207962751388,0.11078608781099,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.02646521385759,0.05401717498898,0.28894723951817,0.04434783570468,0.11571807786822,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.0207408387214,0.04092379286885,0.25535389780998,0.02985862828791,0.09334316104651,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.02142804116011,0.04879641532898,0.27923420071602,0.0342170484364,0.11063806712627,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_200720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02454588003457,0.08216625824571,0.45997235178947,0.05145201086998,0.18291714042425,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.03132585808635,0.09125545620918,0.44752967357635,0.06336142122746,0.18930844962597,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.03189267031848,0.09244126081467,0.44755314290524,0.06918661668897,0.19358334690332,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.02765567973256,0.08337458223105,0.45647865533829,0.05150516331196,0.18087995052338,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.03474836051464,0.09604858607054,0.44700327515602,0.07202392816544,0.20102800428867,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_220720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.01930648740381,0.0663922727108,0.39421299099922,0.04703480191529,0.17678136378527,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.0265209292993,0.07662331312895,0.38848266005516,0.05925175361335,0.18632294237614,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.02625386789441,0.07528602331877,0.37772665917873,0.06187173351646,0.18400397151709,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.02298779599369,0.06812509149313,0.38754187524319,0.04742288030684,0.17164145410061,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.02844028174877,0.07997496426106,0.37950111925602,0.06590992212296,0.19247905164957,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_230620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.03152492269874,0.09833614528179,0.63412433862686,0.04226098954678,0.16217160224915,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.02555047161877,0.08407041430473,0.5771449804306,0.03652158752084,0.16111990809441,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.03067859821022,0.08990556746721,0.58429265022278,0.0414254590869,0.1594340056181,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.03092943131924,0.09158532321453,0.57465845346451,0.04286000132561,0.15560138225555,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.0290344459936,0.08628984540701,0.59155830740929,0.03986936062574,0.16495989263058,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_240620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.01887219585478,0.04936913028359,0.40254253149033,0.02460776269436,0.11261589825153,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.02441161964089,0.05466130748391,0.405762180686,0.03055867180228,0.11723227798939,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.02224458102137,0.05194922350347,0.40520313382149,0.0288751181215,0.11427058652043,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.02180106472224,0.05048925802112,0.41598591208458,0.0275558847934,0.11194690316915,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.02417158894241,0.05745458230376,0.43731698393822,0.0305899232626,0.12432320415974,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_250620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02857915218919,0.06561426073313,0.29417185485363,0.04690415412188,0.12152681127191,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.03256279975176,0.06650457531214,0.28535088896751,0.05031367018819,0.12535981088877,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.03249487094581,0.06739170104265,0.2769447863102,0.05083349719644,0.1193203702569,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.02680287417024,0.05635162815452,0.27460885047913,0.03972666151822,0.11531626433134,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.03511358052492,0.07057294994593,0.26102067530155,0.05668625235558,0.12892803549766,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_270720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02502189390361,0.06882863119245,0.2890669554472,0.06901026889682,0.15342984348535,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.02393637783825,0.07196127250791,0.32066695392132,0.06189220957458,0.16705987602472,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.03116400912404,0.07983272150159,0.31334085762501,0.07528245076537,0.17513106018305,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.03108229488134,0.08000330626965,0.29827067255974,0.07448498159647,0.17052379250526,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.02700249291956,0.07670848816633,0.31795978546143,0.06397847086191,0.17307102680206,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_290620_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.00244491430931,0.00529054831713,0.04292875155806,0.00245283963159,0.01364300679415,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.0032072190661,0.00574673619121,0.04195026680827,0.00302049564198,0.01378114614636,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.003069250728,0.00577543582767,0.04271410964429,0.00291791500058,0.01403577439487,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.0027393909404,0.00483367941342,0.04123995825648,0.00258120463695,0.01230455935001,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.00323924212717,0.00588325364515,0.04267670214176,0.00300936424173,0.01421691197902,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Robot_300720_yield


Unnamed: 0,Plot_ID,Blue,Green,NIR,Red,RedEdge,GrainYield,Block,Name,CodeName,Heading_Date,Maturity_Date,Days2Heading,Days2Maturity
0,1101,0.02599355671555,0.05885926634073,0.37766008079052,0.07048562169075,0.19094762206078,453.6585366,1,Avle,,2020-06-21,2020-08-07,62,109
1,1102,0.03594646416605,0.0752526409924,0.35610428452492,0.08933207392693,0.19496158510447,439.0243902,1,,GN10637,2020-06-21,2020-08-11,62,113
2,1103,0.03221750445664,0.07311228662729,0.32561831176281,0.08758716285229,0.18750194460154,409.7560976,1,Runar,,2020-06-19,2020-08-04,60,106
3,1104,0.0311429053545,0.07130559533835,0.3344883620739,0.08317595720291,0.18769133090973,474.796748,1,Betong,GN13618,2020-06-20,2020-08-08,61,110
4,1105,0.03453932702541,0.07546604424715,0.32038909196854,0.09256618469953,0.19051899015904,411.3821138,1,Reno,,2020-06-20,2020-08-04,61,106


Wall time: 236 ms


### Masbasis 2021

### Summary

In [215]:
# Yield data is required for the following fields/years
sorted_field_year_dict.keys()

dict_keys(['Graminor_2020', 'Graminor_2019', 'Masbasis_2020', 'Masbasis_2021', 'Masbasis_2019', 'Robot_2020', 'Staur_2019', 'Staur_2020'])

In [216]:
print('Yield data added for the following datasets. ToDo: Yield data for Masbasis 2021 and bands data for all fields at Staur is not available/processed.')
# df_w_yields

Yield data added for the following datasets. ToDo: Yield data for Masbasis 2021 and bands data for all fields at Staur is not available/processed.


## Summary of remaining data and missing grain yield data

In [217]:
for field, dates in sorted_field_year_dict.items():
    rows_df = []
    rows_df_yield = []
    missing_yield_df = []

    # Grain yield date is not available for Masbasis 2021
    # So skipping it for now
    if field == 'Masbasis_2021':
        continue
        
    for date in dates:
        field_name = field.split('_')[0]+'_'+date.strftime('%d%m%y')
        temp_df = locals()[field_name].copy()
        rows_df.append(temp_df.shape[0])
#         print(field_name, temp_df.shape)

        field_name_yield = field.split('_')[0]+'_'+date.strftime('%d%m%y')+'_yield'
        temp_df_yield = locals()[field_name_yield].copy()
        missing_yield_df.append(temp_df_yield['GrainYield'].isna().sum())
        rows_df_yield.append(temp_df_yield.shape[0])
#         print(field_name, temp_df.shape, temp_df_yield['GrainYield'].isna().sum())

    print(field.split('_')[0], date.year, '      Rows:', rows_df)
    print(field.split('_')[0], date.year, 'Yield Rows:', rows_df_yield)
    print(field.split('_')[0], date.year, 'Grain Yield Missing:', missing_yield_df)

# sorted_field_year_dict
#         print(field_name)

Graminor 2020       Rows: [400, 757, 400, 800, 800, 800, 400, 400, 787, 793, 800]
Graminor 2020 Yield Rows: [400, 757, 400, 800, 800, 800, 400, 400, 787, 793, 800]
Graminor 2020 Grain Yield Missing: [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
Graminor 2019       Rows: [600, 600, 600, 600, 597, 300, 600, 600, 600, 600, 600]
Graminor 2019 Yield Rows: [600, 600, 600, 600, 597, 300, 600, 600, 600, 600, 600]
Graminor 2019 Grain Yield Missing: [1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1]
Masbasis 2020       Rows: [688, 688, 688, 688, 688, 688, 688, 688, 688, 688, 688, 688]
Masbasis 2020 Yield Rows: [688, 688, 688, 688, 688, 688, 688, 688, 688, 688, 688, 688]
Masbasis 2020 Grain Yield Missing: [117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117, 117]
Masbasis 2019       Rows: [528, 528, 528, 528, 528, 528, 528, 528]
Masbasis 2019 Yield Rows: [528, 528, 528, 528, 528, 528, 528, 528]
Masbasis 2019 Grain Yield Missing: [6, 6, 6, 6, 6, 6, 6, 6]
Robot 2020       Rows: [96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, 96

# Create field_year_dict_yield and sorted_field_year_dict_yield since a few datasets have been dropped now

In [218]:
%%time

field_year_dict_yield = {}

for df in df_w_yields:

    # Splitting the df name to get field name and date
    split_name = df.split('_')
    field_name = split_name[0]
    date1 = split_name[1]
    
    # Creating a dict with all dates for a certain field for a certain year
    dict_key = field_name+'_20'+date1[-2:]
    if dict_key in field_year_dict_yield:
        field_year_dict_yield[dict_key].append(date1)
    else:
        field_year_dict_yield[dict_key] = [date1]
        
#     print(date1)
print('field_year_dict_yield created.')

# pprint(field_year_dict_yield)


field_year_dict_yield created.
Wall time: 0 ns


In [219]:
sorted_field_year_dict_yield = {}
for key, dates_list in field_year_dict_yield.items():
    # Converting the dates to a datetime date object and sorting them in list
    sorted_dated = sorted([
        dt.strptime(date, '%d%m%y').date()
        for date in dates_list])

    sorted_field_year_dict_yield[key] = sorted_dated
    
print('sorted_field_year_dict_yield created.')
# pprint(sorted_field_year_dict_yield)

sorted_field_year_dict_yield created.


# Defining Base Indices

In [220]:
base_indices = ['Blue', 'Green', 'Red', 'RedEdge', 'NIR']
id_cols = ['Plot_ID']
std_columns = id_cols+base_indices

# Get yield columns from all df_w_yield

## Creating a list of yield+other columns just created while adding yield data to the datasets

In [221]:
yield_cols_temp = []
for df in df_w_yields:
    temp_df = locals()[df].copy()
#     print([x for x in temp_df.columns if not x in std_columns])
    yield_cols_temp.extend([x for x in temp_df.columns if not x in std_columns])
yield_cols = list(set(yield_cols_temp))
yield_cols

['Entry',
 'CodeName',
 'Block',
 'Pedigree',
 'Replicates',
 'iBlock',
 'Heading_Date',
 'Maturity_Date',
 'Days2Maturity',
 'Line',
 'Lodging',
 'GrainYield',
 'Days2Heading',
 'Name']

# Generate more indices

## Finding problems with old formulas by cmoparing them with new

### New corrected verified formulas

In [222]:
# temp_df = Graminor_050819_yield.iloc[1:3,]
# temp_df['NDVI']= (temp_df['NIR']-temp_df['Red']) / (temp_df['NIR']+temp_df['Red'])
# temp_df['MTCI']= (temp_df['NIR']-temp_df['RedEdge']) / (temp_df['RedEdge']-temp_df['Red'])
# temp_df['DVI']=temp_df['NIR']-temp_df['Red']
# temp_df['GDVI']=temp_df['NIR']-temp_df['Green']
# temp_df['EXG']=(2*temp_df['Green'])-temp_df['Red']-temp_df['Blue']
# temp_df['EXGR']=(3*temp_df['Green'])-(2.4*temp_df['Red'])-temp_df['Blue']
# temp_df['RDVI']=(temp_df['NIR']-temp_df['Red'])/np.sqrt(temp_df[['NIR','Red']].sum(axis=1))
# temp_df['TDVI']=1.5*(temp_df['NIR']-temp_df['Red'])/np.sqrt((np.power(temp_df['NIR'],2)+ temp_df['Red']+0.5 ))
# temp_df['GNDVI']=(temp_df['NIR']-temp_df['Green'])/(temp_df['NIR']+temp_df['Green'])
# temp_df['NDRE']=(temp_df['NIR']-temp_df['RedEdge'])/(temp_df['NIR']+temp_df['RedEdge'])
# temp_df['SCCI']=temp_df['NDRE']/temp_df['NDVI']
# temp_df['EVI']=2.5*(temp_df['NIR']-temp_df['Red'])/(temp_df['NIR']-6*(temp_df['Red'])-(7.5*temp_df['Blue'])+1)
# temp_df['TVI']=0.5*(120*(temp_df['NIR']-temp_df['Green'])-200*(temp_df['Red']-temp_df['Green']))
# temp_df['VARI']=(temp_df['Green']-temp_df['Red'])/(temp_df['Green']+temp_df['Red']-temp_df['Blue'])
# temp_df['GARI']=(temp_df['NIR' ]-(temp_df['Green']-(1.7*(temp_df['Blue']-temp_df['Red']))))/(temp_df['NIR']+(temp_df['Green']-(1.7*(temp_df['Blue']-temp_df['Red']))))
# temp_df['GCI']=(temp_df['NIR']/temp_df['Green'])-1
# temp_df['GLI']=((2*temp_df['Green'])-temp_df['Red']-temp_df['Blue'])/((2*temp_df['Green'])+temp_df['Red']+temp_df['Blue'])
# temp_df['NLI']=(np.power(temp_df['NIR'],2)-temp_df['Red'])/(np.power(temp_df['NIR'],2)+temp_df['Red'])
# temp_df['MNLI']=(np.power(temp_df['NIR'],2)-temp_df['Red'])*1.5/(np.power(temp_df['NIR'],2)+temp_df['Red'] + 0.5)
# temp_df['SAVI']= ((temp_df['NIR']-temp_df['Red'])*1.5)/(temp_df['NIR']+ temp_df['Red']+ 0.5) 
# temp_df['GSAVI']= ((temp_df['NIR']-temp_df['Green'])*1.5)/(temp_df['NIR']+ temp_df['Green']+ 0.5)
# temp_df['OSAVI']= ((temp_df['NIR']-temp_df['Red']))/(temp_df['NIR']+ temp_df['Red']+ 0.16)
# temp_df['GOSAVI']= ((temp_df['NIR']-temp_df['Green']))/(temp_df['NIR']+ (temp_df['Green'])+ 0.16)
# temp_df['MSAVI2']=(2*temp_df['NIR'])+1-np.sqrt(np.power(((2*temp_df['NIR'])+1),2)-8*(temp_df['NIR']-temp_df['Red']))/2
# temp_df['MSR']=((temp_df['NIR']/temp_df['Red'])-1)/ (np.sqrt(temp_df['NIR']/temp_df['Red'])+1)
# temp_df['GRVI']=(temp_df['NIR']/temp_df['Green'])
# temp_df['WDRVI']=((0.1*temp_df['NIR'])-temp_df['Red'])/((0.1*temp_df['NIR'])+temp_df['Red'])
# temp_df['SR']=(temp_df['NIR']/temp_df['Red'])
# temp_df
# temp_df_new = temp_df.copy()

### Old formulas provided by Sahaemh

In [223]:
# spectral_indices = ['NDVI', 'MTCI', 'DVI', 'GDVI', 'MTCI_CI', 'EXG', 'EXGR', 'RDVI',
#                     'TDVI', 'GNDVI', 'NDRE', 'SCCI', 'EVI', 'TVI', 'VARI', 'GARI',
#                     'GCI', 'GLI', 'NLI', 'MNLI', 'SAVI', 'GSAVI', 'OSAVI', 'GOSAVI',
#                     'MSAVI2', 'MSR', 'GRVI', 'WDRVI', 'SR']

# temp_df = Graminor_050819_yield.iloc[1:3,]
# temp_df['NDVI']= (temp_df['NIR']-temp_df['Red']) / (temp_df['NIR']+temp_df['Red'])
# temp_df['MTCI']= (temp_df['NIR']-temp_df['RedEdge']) / (temp_df['RedEdge']-temp_df['Red'])

# temp_df['DVI']=temp_df['NIR']-temp_df['Red']
# temp_df['GDVI']=temp_df['NIR']-temp_df['Green']
# # temp_df['MTCI_CI']=(temp_df['NIR']-temp_df['RedEdge'])/(temp_df['RedEdge']-temp_df['Red'])
# temp_df['EXG']=(2*temp_df['Green'])-temp_df['Red']-temp_df['Blue']
# temp_df['EXGR']=(3*temp_df['Green'])-(2.4*temp_df['Red'])-temp_df['Blue']

# temp_df['RDVI']=(temp_df['NIR']-temp_df['Red'])/np.sqrt(temp_df[['NIR','Red']].sum(axis=1))
# temp_df['TDVI']=1.5*(temp_df['NIR']-temp_df['Red'])/np.sqrt((np.power(temp_df['NIR'],2)+ temp_df['Red']+0.5 ))
# temp_df['GNDVI']=(temp_df['NIR']-temp_df['Green'])/(temp_df['NIR']+temp_df['Green'])
# temp_df['NDRE']=(temp_df['NIR']-temp_df['RedEdge'])/(temp_df['NIR']+temp_df['RedEdge'])
# temp_df['SCCI']=temp_df['NDRE']/temp_df['NDVI']
# temp_df['EVI']=2.5*(temp_df['NIR']-temp_df['Red'])/(temp_df['NIR']-6*(temp_df['Red'])-(7.5*temp_df['Blue'])-1)
# temp_df['TVI']=0.5*(120*(temp_df['NIR']-temp_df['Green'])-200*(temp_df['Red']-temp_df['Green']))
# temp_df['VARI']=(temp_df['Green']-temp_df['Red'])/(temp_df['Green']+temp_df['Red']-temp_df['Blue'])
# temp_df['GARI']=(temp_df['NIR' ]-temp_df['Green'])-(1.7*(temp_df['Blue']-temp_df['Red']))/(temp_df['NIR']+temp_df['Green'])-(1.7*(temp_df['Blue']-temp_df['Red']))
# temp_df['GCI']=(temp_df['NIR']/temp_df['Green'])-1
# temp_df['GLI']=(temp_df['Green']-temp_df['Red']-temp_df['Blue'])/(2*temp_df['Green']+temp_df['Red']+temp_df['Blue'])
# temp_df['NLI']=(np.power(temp_df['NIR'],2)-temp_df['Red'])/(np.power(temp_df['NIR'],2)+temp_df['Red'])
# temp_df['MNLI']=(np.power(temp_df['NIR'],2)-temp_df['Red'])*1.5/(np.power(temp_df['NIR'],2)+temp_df['Red'] + 0.5)
# temp_df['SAVI']= ((temp_df['NIR']-temp_df['Red'])*1.5)/(temp_df['NIR']+ temp_df['Red']+ 0.5) 
# temp_df['GSAVI']= ((temp_df['NIR']-temp_df['Green'])*1.5)/(temp_df['NIR']+ temp_df['Green']+ 0.5)                                    
# temp_df['OSAVI']= ((temp_df['NIR']-temp_df['Red']))/(temp_df['NIR']+ temp_df['Red']+ 0.16)
# temp_df['GOSAVI']= ((temp_df['NIR']-temp_df['Green']))/(temp_df['NIR']+ (temp_df['Green'])+ 0.16)
# temp_df['MSAVI2']=(2*temp_df['NIR'])+1-np.sqrt(np.power((2*temp_df['NIR']+1),2)-8*(temp_df['NIR']-temp_df['Red']))/2
# temp_df['MSR']=(temp_df['NIR']/temp_df['Red'])-(1/np.sqrt(temp_df['NIR']/temp_df['Red']))
# temp_df['GRVI']=(temp_df['NIR']/temp_df['Green'])
# temp_df['WDRVI']=((0.1*temp_df['NIR'])-temp_df['Red'])/((0.1*temp_df['NIR'])+temp_df['Red'])
# temp_df['SR']=(temp_df['NIR']/temp_df['Red'])
# temp_df
# temp_df_old = temp_df.copy() 

In [224]:
# temp_df_new == temp_df_old

Finding is that the formulas of the following were wrong in the provided code.
EVI  
GARI  
GLI  
MSR  
  
Code comparision  
NEW  
OLD  
  
Should be +1 rather than -1  
temp_df['EVI']=2.5*(temp_df['NIR']-temp_df['Red'])/(temp_df['NIR']-6*(temp_df['Red'])-(7.5*temp_df['Blue'])+1)  
temp_df['EVI']=2.5*(temp_df['NIR']-temp_df['Red'])/(temp_df['NIR']-6*(temp_df['Red'])-(7.5*temp_df['Blue'])-1)  
  
Bracket before GREEN makes the difference  
temp_df['GARI']=(temp_df['NIR' ]-(temp_df['Green']-(1.7*(temp_df['Blue']-temp_df['Red']))))/(temp_df['NIR']+(temp_df['Green']-(1.7*(temp_df['Blue']-temp_df['Red']))))  
temp_df['GARI']=(temp_df['NIR' ]-temp_df['Green'])-(1.7*(temp_df['Blue']-temp_df['Red']))/(temp_df['NIR']+temp_df['Green'])-(1.7*(temp_df['Blue']-temp_df['Red']))  
  
Missing 2 with green in the begining  
temp_df['GLI']=((2*temp_df['Green'])-temp_df['Red']-temp_df['Blue'])/((2*temp_df['Green'])+temp_df['Red']+temp_df['Blue'])  
temp_df['GLI']=(temp_df['Green']-temp_df['Red']-temp_df['Blue'])/(2*temp_df['Green']+temp_df['Red']+temp_df['Blue'])  
  
Different formula  
temp_df['MSR']=((temp_df['NIR']/temp_df['Red'])-1)/ (np.sqrt(temp_df['NIR']/temp_df['Red'])+1)  
temp_df['MSR']=(temp_df['NIR']/temp_df['Red'])-(1/np.sqrt(temp_df['NIR']/temp_df['Red']))  
  

## Adding correct indices to datasets

In [225]:
%%time

# List of all indices being created in this code block
spectral_indices = ['NDVI', 'MTCI', 'DVI', 'GDVI', 'EXG', 'EXGR', 'RDVI',
                    'TDVI', 'GNDVI', 'NDRE', 'SCCI', 'EVI', 'TVI', 'VARI', 'GARI',
                    'GCI', 'GLI', 'NLI', 'MNLI', 'SAVI', 'GSAVI', 'OSAVI', 'GOSAVI',
                    'MSAVI2', 'MSR', 'GRVI', 'WDRVI', 'SR']

list_df_all_indices = []

for df in df_w_yields:
    new_df_name = df + '_indices'
    date1 = new_df_name.split('_')[1]
    
    temp_df = locals()[df].copy()
    
    # Decided to generate the idices after applying Simpsons integration on the base indices
    
#     ######indices definition
    temp_df['NDVI']= (temp_df['NIR']-temp_df['Red']) / (temp_df['NIR']+temp_df['Red'])
    temp_df['MTCI']= (temp_df['NIR']-temp_df['RedEdge']) / (temp_df['RedEdge']-temp_df['Red'])
    temp_df['DVI']=temp_df['NIR']-temp_df['Red']
    temp_df['GDVI']=temp_df['NIR']-temp_df['Green']
    temp_df['EXG']=(2*temp_df['Green'])-temp_df['Red']-temp_df['Blue']
    temp_df['EXGR']=(3*temp_df['Green'])-(2.4*temp_df['Red'])-temp_df['Blue']
    temp_df['RDVI']=(temp_df['NIR']-temp_df['Red'])/np.sqrt(temp_df[['NIR','Red']].sum(axis=1))
    temp_df['TDVI']=1.5*(temp_df['NIR']-temp_df['Red'])/np.sqrt((np.power(temp_df['NIR'],2)+ temp_df['Red']+0.5 ))
    temp_df['GNDVI']=(temp_df['NIR']-temp_df['Green'])/(temp_df['NIR']+temp_df['Green'])
    temp_df['NDRE']=(temp_df['NIR']-temp_df['RedEdge'])/(temp_df['NIR']+temp_df['RedEdge'])
    temp_df['SCCI']=temp_df['NDRE']/temp_df['NDVI']
    temp_df['EVI']=2.5*(temp_df['NIR']-temp_df['Red'])/(temp_df['NIR']-6*(temp_df['Red'])-(7.5*temp_df['Blue'])+1)
    temp_df['TVI']=0.5*(120*(temp_df['NIR']-temp_df['Green'])-200*(temp_df['Red']-temp_df['Green']))
    temp_df['VARI']=(temp_df['Green']-temp_df['Red'])/(temp_df['Green']+temp_df['Red']-temp_df['Blue'])
    temp_df['GARI']=(temp_df['NIR' ]-(temp_df['Green']-(1.7*(temp_df['Blue']-temp_df['Red']))))/(temp_df['NIR']+(temp_df['Green']-(1.7*(temp_df['Blue']-temp_df['Red']))))
    temp_df['GCI']=(temp_df['NIR']/temp_df['Green'])-1
    temp_df['GLI']=((2*temp_df['Green'])-temp_df['Red']-temp_df['Blue'])/((2*temp_df['Green'])+temp_df['Red']+temp_df['Blue'])
    temp_df['NLI']=(np.power(temp_df['NIR'],2)-temp_df['Red'])/(np.power(temp_df['NIR'],2)+temp_df['Red'])
    temp_df['MNLI']=(np.power(temp_df['NIR'],2)-temp_df['Red'])*1.5/(np.power(temp_df['NIR'],2)+temp_df['Red'] + 0.5)
    temp_df['SAVI']= ((temp_df['NIR']-temp_df['Red'])*1.5)/(temp_df['NIR']+ temp_df['Red']+ 0.5) 
    temp_df['GSAVI']= ((temp_df['NIR']-temp_df['Green'])*1.5)/(temp_df['NIR']+ temp_df['Green']+ 0.5)
    temp_df['OSAVI']= ((temp_df['NIR']-temp_df['Red']))/(temp_df['NIR']+ temp_df['Red']+ 0.16)
    temp_df['GOSAVI']= ((temp_df['NIR']-temp_df['Green']))/(temp_df['NIR']+ (temp_df['Green'])+ 0.16)
    temp_df['MSAVI2']=(2*temp_df['NIR'])+1-np.sqrt(np.power(((2*temp_df['NIR'])+1),2)-8*(temp_df['NIR']-temp_df['Red']))/2
    temp_df['MSR']=((temp_df['NIR']/temp_df['Red'])-1)/ (np.sqrt(temp_df['NIR']/temp_df['Red'])+1)
    temp_df['GRVI']=(temp_df['NIR']/temp_df['Green'])
    temp_df['WDRVI']=((0.1*temp_df['NIR'])-temp_df['Red'])/((0.1*temp_df['NIR'])+temp_df['Red'])
    temp_df['SR']=(temp_df['NIR']/temp_df['Red'])
                                                                                                
    list_df_all_indices.append(new_df_name)
    locals()[new_df_name] = temp_df.copy()
list_df_all_indices
for x in list_df_all_indices:
    print(x, locals()[x].shape)

Graminor_020719_yield_indices (600, 40)
Graminor_050719_yield_indices (597, 40)
Graminor_050819_yield_indices (600, 40)
Graminor_060619_yield_indices (600, 40)
Graminor_070819_yield_indices (600, 40)
Graminor_110619_yield_indices (600, 40)
Graminor_110719_yield_indices (300, 40)
Graminor_150719_yield_indices (600, 40)
Graminor_150819_yield_indices (600, 40)
Graminor_250719_yield_indices (600, 40)
Graminor_280619_yield_indices (600, 40)
Graminor_010720_yield_indices (400, 40)
Graminor_040720_yield_indices (800, 40)
Graminor_040820_yield_indices (793, 40)
Graminor_070720_yield_indices (800, 40)
Graminor_130720_yield_indices (800, 40)
Graminor_140820_yield_indices (800, 40)
Graminor_170720_yield_indices (400, 40)
Graminor_180620_yield_indices (400, 40)
Graminor_200720_yield_indices (400, 40)
Graminor_240620_yield_indices (757, 40)
Graminor_300720_yield_indices (787, 40)
Masbasis_050719_yield_indices (528, 41)
Masbasis_060619_yield_indices (528, 41)
Masbasis_070819_yield_indices (528, 41)


In [226]:
len(list_df_all_indices)
list_df_all_indices

print(len(spectral_indices))
Graminor_250719_yield_indices.shape

28


(600, 40)

# Exporting yield_cols, base_indices and spectral indices to be used at later stages

In [227]:
# Make sure the folder/dir is there. If not, create one
os.makedirs(main_path, exist_ok=True)
import json
a_file = open(main_path+'yield_columns.json', "w")
json.dump(yield_cols, a_file)
a_file.close()


# a_file = open(main_path+"\yield_columns.json", "r")
# output_str = a_file.read()

# # The file is imported as string
# # Converting it to python format
# yield_cols = json.loads(output_str)
# a_file.close()
# print(yield_cols)

In [228]:
# Make sure the folder/dir is there. If not, create one
os.makedirs(main_path, exist_ok=True)
import json
a_file = open(main_path+'spectral_indices_columns.json', "w")
json.dump(spectral_indices, a_file)
a_file.close()


# a_file = open(main_path+"spectral_indices_columns.json", "r")
# output_str = a_file.read()

# # The file is imported as string
# # Converting it to python format
# spectral_indices = json.loads(output_str)
# a_file.close()
# print(spectral_indices)

In [229]:
# Make sure the folder/dir is there. If not, create one
os.makedirs(main_path, exist_ok=True)
import json
a_file = open(main_path+'base_indices_columns.json', "w")
json.dump(base_indices, a_file)
a_file.close()


# a_file = open(main_path+"base_indices_columns.json", "r")
# output_str = a_file.read()

# # The file is imported as string
# # Converting it to python format
# base_indices = json.loads(output_str)
# a_file.close()
# print(base_indices)

# Exporting the datasets to csv files

In [230]:
os.makedirs(export_path, exist_ok=True)
for df in list_df_all_indices:
    locals()[df].to_csv(export_path+df+'.csv', index=False)

# END OF SECTION