In [1]:
import pandas as pd
import os

In [None]:
def Tavg_cal(df):
    df['Tavg'] = (df['tmax'] + df['tmin'])/2
    return df



def doy_cal(df, date_column='date'):
    """
    Adds a Day of Year (DOY) column to a DataFrame and inserts it as the second column.
    
    Parameters:
        df (pd.DataFrame): The DataFrame containing the date column.
        date_column (str): The name of the column with date values (default is 'date').
        
    Returns:
        pd.DataFrame: The DataFrame with an added 'DOY' column as the second column.
    """
    # Ensure the date column is in datetime format
    df[date_column] = pd.to_datetime(df[date_column])
    
    # Add the DOY column
    df.insert(1, 'doy', df[date_column].dt.dayofyear)
    
    return df


def dap_cal(df, date_column='date'):
    """
    Adds a column to calculate days after the planting date,
    where the planting date is the first date in the dataset for each year.
    
    Parameters:
        df (pd.DataFrame): DataFrame containing the date column.
        date_column (str): Name of the column with date values (default is 'date').
        
    Returns:
        pd.DataFrame: DataFrame with a new 'days_after_planting' column.
    """
    # Ensure the date column is in datetime format
    df[date_column] = pd.to_datetime(df[date_column])
    
    # Sort the DataFrame by date
    #df = df.sort_values(by=date_column)
    
    # Determine the first date of each year (planting date)
    df['planting_date'] = df.groupby(df[date_column].dt.year)[date_column].transform('min')
    
    # Calculate days after planting
    
    df.insert(2, 'dap',(df[date_column] - df['planting_date']).dt.days)
    # Drop the 'planting_date' column if not needed
    df.drop(columns=['planting_date'], inplace=True)
    
    return df


#define GDD function:
def gdd_cal(Tdata):
    Tdata['gdd'] = Tdata.apply(
    lambda row: 0 if row['Tavg'] < 0
    else 30 - 0 if row['Tavg'] > 30
    else row['Tavg'] - 0, axis=1
    
)
    
#define dgdd function
def dgdd_cal(df):
    # Calculate the difference in 'gdd' between each row and the previous row
    df['dgdd'] = abs(df['gdd'].diff().fillna(0))
    return df

#define dtr function
def dtr_cal(df):
    # Calculate the difference in 'gdd' between each row and the previous row
    df['dtr'] = df['tmax'] - df['tmin']
    return df

def prdtr_cal(df):
    # Calculate the difference in 'gdd' between each row and the previous row
    df['prdtr'] = df['precip']  / df['dtr']
    return df




def cum_gdd(df, date_column='date', tavg_column='Tavg', lower_threshold=0, upper_threshold=30):
    """
    Calculates cumulative GDD with thresholds of 0 and 30°C, resetting at the start of each year.
    
    Parameters:
        df (pd.DataFrame): DataFrame containing daily temperature data.
        date_column (str): Name of the column with dates.
        tavg_column (str): Name of the column with average daily temperature.
        lower_threshold (float): Lower threshold for GDD (default = 0°C).
        upper_threshold (float): Upper threshold for GDD (default = 30°C).
        
    Returns:
        pd.DataFrame: DataFrame with an added 'Cumulative_GDD' column that resets each year.
    """
    # Ensure the date column is in datetime format
    df[date_column] = pd.to_datetime(df[date_column])
    
    # Extract year from the date
    df['Year'] = df[date_column].dt.year
    
    # Calculate daily GDD with thresholds
    df['GDD'] = df[tavg_column].apply(
        lambda t: max(0, min(upper_threshold, t) - lower_threshold)
    )
    
    # Calculate cumulative GDD for each year
    df['cum_gdd'] = df.groupby('Year')['GDD'].cumsum()
    
    # Drop intermediate columns if necessary
    df.drop(columns=['GDD', 'Year'], inplace=True)
    
    return df




In [3]:
def stage(df, cumulative_gdd_column='cum_gdd'):
    """
    Assigns phenological stages to each row based on cumulative GDD values.
    
    Parameters:
        df (pd.DataFrame): DataFrame containing cumulative GDD data.
        cumulative_gdd_column (str): Column name for cumulative GDD values.
        
    Returns:
        pd.DataFrame: DataFrame with an added 'Phenological_Stage' column.
    """
    # Define phenological stages and their cumulative GDD ranges
    stages = [
        (0, 110, 'Emergence'),
        (110, 440, 'Tillering'),
        (440, 670, 'Jointing'),
        (670, 1100, 'Heading'),
        (1100, 1300, 'Flowering'),
        (1300, 1675, 'Grain fill'),
        (1675, 1800, 'Maturity')
    ]
    
    # Function to determine the stage based on cumulative GDD
    def get_stage(cumulative_gdd):
        for lower, upper, stage in stages:
            if lower <= cumulative_gdd < upper:
                return stage
        return 'Beyond Maturity'  # For GDD > 1800
    
    # Apply the stage determination function to the cumulative GDD column
    df.insert(3,'stage',df[cumulative_gdd_column].apply(get_stage))
    
    return df


In [None]:
#########################################
###    Calculation of wet day         ###
#########################################

def wet_day_cal(df):
    # Create a new column 'wet' where 1 indicates a wet day and 0 indicates a dry day
    df['wet'] = (df['precip'] > 0).astype(int)
    return df


directory_path = './p_h_with_strs'
output_directory = './p_h_with_strs'

for filename in os.listdir(directory_path):
    if filename.endswith('.xlsx'):
        try:
            file_path = os.path.join(directory_path, filename)
            data = pd.read_excel(file_path)

            # Apply calculations
            wet_day_cal(data)
            
            # Create a unique output file path
            output_file_path = os.path.join(output_directory, filename.replace('.csv', '.xlsx'))
            data.to_excel(output_file_path, index=False)
        except Exception as e:
            print(f"Error processing {filename}: {e}")

In [4]:
import pandas as pd
import os

def cal_fdd_hdd(df, cl_values, ch_values):
    """
    Calculates FDD and HDD for each crop growth stage based on CL and CH thresholds.
    
    Parameters:
        df (pd.DataFrame): DataFrame containing daily temperature data and growth stages.
                          Columns: 'Date', 'Tavg', 'Stage'
        cl_values (dict): Dictionary of Critical Low (CL) thresholds for each stage.
                          Example: {'Emergence': -17.2, 'Tillering': -17.2, ...}
        ch_values (dict): Dictionary of Critical High (CH) thresholds for each stage.
                          Example: {'Emergence': 22.79, 'Tillering': 20.90, ...}
    
    Returns:
        pd.DataFrame: Original DataFrame with added 'FDD' and 'HDD' columns.
    """
    df['date'] = pd.to_datetime(df['date'])
    # Initialize FDD and HDD columns
    df['fdd'] = df.apply(lambda row: max(0, cl_values[row['stage']] - row['tmin']), axis=1)
    df['hdd'] = df.apply(lambda row: max(0, row['tmax'] - ch_values[row['stage']]), axis=1)
    
    # Group by Stage to calculate cumulative FDD and HDD
    grouped = df.groupby('stage').agg(
        Cumulative_FDD=('fdd', 'sum'),
        Cumulative_HDD=('hdd', 'sum')
    ).reset_index()
    
    return df, grouped




# Define CL and CH thresholds for each stage
cl_thresholds = {
    'Emergence': 4.23,
    'Tillering': 5.26,
    'Jointing': 2.02,
    'Heading': 2.99,
    'Flowering': 11.12,
    'Grain fill': 14.45,
    'Maturity':14.45,
    'Beyond Maturity': 14.45
}

ch_thresholds = {
    'Emergence': 22.79,
    'Tillering': 20.90,
    'Jointing': 22.55,
    'Heading': 20,
    'Flowering': 27,
    'Grain fill': 30,
    'Maturity':30,
    'Beyond Maturity': 30
}


"""
directory_path = './planting_heading_daily_variables'
output_directory = './v'

for filename in os.listdir(directory_path):
    if filename.endswith('.xlsx'):
        try:
            file_path = os.path.join(directory_path, filename)
            data = pd.read_excel(file_path)
            # Convert Date to datetime
            data['date'] = pd.to_datetime(df['date'])
            # Calculate FDD and HDD
            df, cumulative_results = cal_fdd_hdd(data, cl_thresholds, ch_thresholds)

            # Create a unique output file path
            output_file_path = os.path.join(output_directory, filename.replace('.csv', '.xlsx'))
            data.to_excel(output_file_path, index=False)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            
"""








'\ndirectory_path = \'./planting_heading_daily_variables\'\noutput_directory = \'./v\'\n\nfor filename in os.listdir(directory_path):\n    if filename.endswith(\'.xlsx\'):\n        try:\n            file_path = os.path.join(directory_path, filename)\n            data = pd.read_excel(file_path)\n            # Convert Date to datetime\n            data[\'date\'] = pd.to_datetime(df[\'date\'])\n            # Calculate FDD and HDD\n            df, cumulative_results = cal_fdd_hdd(data, cl_thresholds, ch_thresholds)\n\n            # Create a unique output file path\n            output_file_path = os.path.join(output_directory, filename.replace(\'.csv\', \'.xlsx\'))\n            data.to_excel(output_file_path, index=False)\n        except Exception as e:\n            print(f"Error processing {filename}: {e}")\n            \n'

In [None]:
import os
import pandas as pd

directory_path = './planting_heading_daily_variables'
output_directory = './v'

for filename in os.listdir(directory_path):
    if filename.endswith('.xlsx'):
        try:
            file_path = os.path.join(directory_path, filename)
            data = pd.read_excel(file_path)

            # Apply calculations
            Tavg_cal(data)
            gdd_cal(data)
            dgdd_cal(data)
            dtr_cal(data)
            prdtr_cal(data)
            doy_cal(data)
            dap_cal(data)
            cum_gdd(data)
            stage(data)
            wet_day_cal(data)
            df, cumulative_results = cal_fdd_hdd(data, cl_thresholds, ch_thresholds)
            # Create a unique output file path
            output_file_path = os.path.join(output_directory, filename.replace('.csv', '.xlsx'))
            data.to_excel(output_file_path, index=False)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
