In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import scale 
from sklearn.preprocessing import StandardScaler
from sklearn import model_selection
from sklearn.model_selection import RepeatedKFold
from sklearn.model_selection import train_test_split
from sklearn.cross_decomposition import PLSRegression
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from scipy.signal import savgol_filter

In [3]:
data_traits = pd.read_csv('2022_sorghum_traits.csv', dtype = { 'Leaf_Num' : 'category', 'SLA' : 'category' , 'THCK' : 'category', 'LWC' : 'category'})

In [4]:
data_reflectance = pd.read_csv('Sorghum_2022_reflectance.csv', dtype={'Genotype' : 'category', 'Rep': 'category' , 'Leaf_Num' : 'category', 'Leaf_pos' : 'category'})

In [5]:
data_traits.head()

Unnamed: 0,Genotype ID,Accession,Genotype,Rep,Leaf_num,CHLp1,CHLp2,CHLp3,FW,LA,DW,SLA,THCK,LWC
0,GREENLEAF,PI 659692,1,1,1,367.4,472.9,399.7,4.45,281.59,1.11,253.684684684685,63.2786516853933,75.0561797752809
1,GREENLEAF,PI 659692,1,1,2,399.7,739.3,357.8,3.45,225.36,0.99,227.636363636364,65.3217391304348,71.304347826087
2,GREENLEAF,PI 659692,1,1,3,439.2,362.3,347.7,1.65,125.04,0.48,260.5,75.7818181818182,70.9090909090909
3,GREENLEAF,PI 659692,1,2,1,447.1,141.9,160.9,4.45,249.96,1.06,235.811320754717,56.1707865168539,76.1797752808989
4,GREENLEAF,PI 659692,1,2,2,325.2,481.0,322.5,1.9,144.58,0.55,262.872727272727,76.0947368421053,71.0526315789474


In [6]:
data_reflectance.head()

Unnamed: 0,ASD_Label,Genotype,Rep,Leaf_num,Leaf_pos,350,351,352,353,354,...,2491,2492,2493,2494,2495,2496,2497,2498,2499,2500
0,1-1-1-3,1,1,1,3,0.067603,0.097,0.104572,0.0956,0.089,...,0.0722,0.0713,0.0707,0.0698,0.069483,0.0705,0.0701,0.0685,0.0683,0.0684
1,1-2-2-1,1,2,2,1,0.0869,0.11104,0.103771,0.0802,0.091,...,0.0598,0.0601,0.0597,0.0584,0.0579,0.0572,0.057,0.0584,0.0583,0.0572
2,1-2-3-3,1,2,3,3,0.0907,0.074,0.0664,0.0781,0.0976,...,0.0682,0.065967,0.0655,0.0651,0.0649,0.0667,0.0668,0.0654,0.0646,0.0628
3,1-2-1-2,1,2,1,2,0.0979,0.112459,0.103933,0.0905,0.118572,...,0.150094,0.149474,0.149433,0.150717,0.150821,0.150798,0.150168,0.148169,0.147093,0.145641
4,1-2-3-2,1,2,3,2,0.100644,0.106306,0.0979,0.085,0.0985,...,0.0512,0.050941,0.0505,0.0497,0.0495,0.049623,0.0492,0.0485,0.048277,0.047548


In [7]:
data_traits = data_traits.iloc[: , 0:14]
data_traits.head()

Unnamed: 0,Genotype ID,Accession,Genotype,Rep,Leaf_num,CHLp1,CHLp2,CHLp3,FW,LA,DW,SLA,THCK,LWC
0,GREENLEAF,PI 659692,1,1,1,367.4,472.9,399.7,4.45,281.59,1.11,253.684684684685,63.2786516853933,75.0561797752809
1,GREENLEAF,PI 659692,1,1,2,399.7,739.3,357.8,3.45,225.36,0.99,227.636363636364,65.3217391304348,71.304347826087
2,GREENLEAF,PI 659692,1,1,3,439.2,362.3,347.7,1.65,125.04,0.48,260.5,75.7818181818182,70.9090909090909
3,GREENLEAF,PI 659692,1,2,1,447.1,141.9,160.9,4.45,249.96,1.06,235.811320754717,56.1707865168539,76.1797752808989
4,GREENLEAF,PI 659692,1,2,2,325.2,481.0,322.5,1.9,144.58,0.55,262.872727272727,76.0947368421053,71.0526315789474


In [8]:
data_traits = data_traits.loc[data_traits['DW'].notnull() , :]

In [9]:
data_traits.shape

(353, 14)

In [10]:
data_traits['Genotype'].dtype

dtype('int64')

In [11]:
grouped = data_traits.groupby(['Genotype', 'Rep']).mean()

In [12]:
grouped.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Leaf_num,CHLp1,CHLp2,CHLp3,FW,LA,DW
Genotype,Rep,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1,2.0,402.1,524.833333,368.4,3.183333,210.663333,0.86
1,2,2.0,368.433333,323.066667,254.166667,2.683333,175.103333,0.7
2,1,1.5,778.5,750.35,631.4,16.0,617.35,4.215
2,2,2.0,581.9,429.0,216.466667,16.483333,630.256667,4.65
4,1,2.0,510.433333,557.733333,555.1,13.833333,596.18,4.443333


In [83]:
grouped.shape

(120, 7)

In [47]:
data = pd.merge(data_traits, data_reflectance, how='outer')

In [48]:
data = data.loc[data['CHLp1'].notnull() , :]

In [49]:
data.to_csv(r'/home/schnable/Downloads/sorghum_2022_whole.csv', index = None)