In [2]:
import pandas as pd

file = "data/EXP_1 V2.xlsx"

xls = pd.ExcelFile(file)
sheets = xls.sheet_names
sheets

['Seedlings measurements',
 'Water quality measurments Senso',
 'Water quality parametersPortabl',
 'Nutrients  Water consumptions',
 'Head diameter',
 'Harvest measurements 842024']

In [3]:
# Load the "Nutrients Water consumptions" sheet without headers
df_nutrients = pd.read_excel(file, sheet_name="Nutrients  Water consumptions", header=None)
df_nutrients.shape

(26, 23)

In [4]:
df = df_nutrients.copy()

In [5]:
df = df.dropna(how='all')
df.reset_index(drop=True, inplace=True)
df


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,13,14,15,16,17,18,19,20,21,22
0,Date,Nutrient solution addition (A+B) ml.,,,,,,,Date,Acid consumption (ml),...,,,,Date,Water consumption L.,,,,,
1,,Replicate 1 T1,Replicate 2 T1,Replicate 3 T1,Replicate 1 T2,Replicate 2 T2,Replicate 3 T2,,,Replicate 1 T1,...,Replicate 2 T2,Replicate 3 T2,,,Replicate 1 T1,Replicate 2 T1,Replicate 3 T1,Replicate 1 T2,Replicate 2 T2,Replicate 3 T2
2,2024-09-03 00:00:00,115,136,136,136,136,136,,2024-09-03 00:00:00,20,...,20,20,,2024-11-03 00:00:00,10,0,0,0,0,100
3,2024-10-03 00:00:00,30,30,30,30,30,30,,2024-10-03 00:00:00,14,...,10,10,,20/3/2024,10,10,10,10,15,10
4,20/3/2024,0,0,0,0,0,136,,2024-01-04 00:00:00,5,...,5,5,,29/3/2024,0,0,0,0,0,16
5,24/3/2024,12,9,9,21,21,27,,,,...,,,,2024-01-04 00:00:00,15,15,15,15,15,15
6,2024-01-04 00:00:00,,,,,,,,,,...,,,,,,,,,,
7,Total,157,175,175,187,187,329,,Total,39,...,35,35,,Total,35,25,25,25,30,141


In [6]:
# Column index groups (based on your file layout)
nutrient_cols = df.columns[1:7]
acid_cols     = df.columns[9:15]
water_cols    = df.columns[17:23]

In [7]:
nutrient_date_col = df.columns[0]
acid_date_col     = df.columns[8]
water_date_col    = df.columns[16]


In [8]:
nutr_df = df[[nutrient_date_col] + list(nutrient_cols)].copy()
acid_df = df[[acid_date_col] + list(acid_cols)].copy()
water_df = df[[water_date_col] + list(water_cols)].copy()

nutr_df.columns = ["Date"] + list(nutrient_cols)
acid_df.columns = ["Date"] + list(acid_cols)
water_df.columns = ["Date"] + list(water_cols)


In [10]:
def clean_dates(series):
    return pd.to_datetime(series, errors='coerce', dayfirst=True)

nutr_df['Date']  = clean_dates(nutr_df['Date'])
acid_df['Date']  = clean_dates(acid_df['Date'])
water_df['Date'] = clean_dates(water_df['Date'])



In [11]:
nutr_df = nutr_df.dropna(subset=['Date'])
acid_df = acid_df.dropna(subset=['Date'])
water_df = water_df.dropna(subset=['Date'])


In [18]:
import re

def melt_long(df, value_name):
    long = df.melt(id_vars='Date', var_name='Group', value_name=value_name)

    # Remove empty columns created by NaN column names
    long = long.dropna(subset=['Group', value_name], how='any')

    # Extract replicate number and treatment (safe for all valid columns)
    long['Replicate'] = long['Group'].str.extract(r'Replicate\s+(\d+)')
    long['Treatment'] = long['Group'].str.extract(r'T(\d+)')

    # Drop rows where extraction failed
    long = long.dropna(subset=['Replicate', 'Treatment'])

    long['Replicate'] = 'R' + long['Replicate']
    long['Treatment'] = 'T' + long['Treatment']

    return long[['Date', 'Treatment', 'Replicate', value_name]]



In [14]:
nutr_df.columns  = nutr_df.columns.astype(str)
acid_df.columns  = acid_df.columns.astype(str)
water_df.columns = water_df.columns.astype(str)


In [15]:
import re

def filter_rep_cols(cols):
    return [c for c in cols if re.match(r'^Replicate\s+\d+\s+T\d+', c)]


In [16]:
nutr_reps  = filter_rep_cols(nutr_df.columns)
acid_reps  = filter_rep_cols(acid_df.columns)
water_reps = filter_rep_cols(water_df.columns)


In [17]:
nutr_df = nutr_df[['Date'] + nutr_reps]
acid_df = acid_df[['Date'] + acid_reps]
water_df = water_df[['Date'] + water_reps]


In [19]:
nutr_long  = melt_long(nutr_df,  "Nutrient_ml")
acid_long  = melt_long(acid_df,  "Acid_ml")
water_long = melt_long(water_df, "Water_L")


In [20]:
print("Nutrient columns:", nutr_reps)
print("Acid columns:", acid_reps)
print("Water columns:", water_reps)


Nutrient columns: []
Acid columns: []
Water columns: []


In [21]:
df_nutrients.columns.tolist()


[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22]

In [22]:
for i, col in enumerate(df_nutrients.columns):
    print(i, repr(col))


0 0
1 1
2 2
3 3
4 4
5 5
6 6
7 7
8 8
9 9
10 10
11 11
12 12
13 13
14 14
15 15
16 16
17 17
18 18
19 19
20 20
21 21
22 22
