### 1. Extract Hru size from output.std to reproduce the SWAT Check table

In [1]:
### Hru size
import pandas as pd

def HRUsize(file_path, str1, str2):
    lines = []
    found_str1 = False

    with open(file_path, 'r') as file:
        for line in file:
            line = line.strip()  # Remove leading/trailing whitespaces
            
            # Skip blank lines
            if not line:
                continue
            
            # If str1 is found, start storing lines
            if str1 in line:
                found_str1 = True
                continue  # Skip storing str1 itself
            
            # If str2 is found, stop storing lines
            if str2 in line and found_str1:
                break
            
            # Store lines if str1 has been found
            if found_str1:
                # Split the line into columns based on the tab delimiter
                columns = line.split()
                lines.append(columns)
    
    column_names = lines[0]
    # Create a DataFrame from the stored lines
    df = pd.DataFrame(lines[1:],columns=column_names)
    size=df['AREAkm2']
    size = size.astype(float)
    return size

# Example to run:
file_path = 'output.std'
str1 = 'AVE ANNUAL VALUES'
str2 = 'AVE MONTHLY BASIN VALUES'
size =HRUsize(file_path, str1, str2)

### 2. Extract target variable from output.hru

In [2]:
### to student v1: output hru
def process_data(file_path, n_Y, n_hru, skip_lines, feature_to_extract):
    # Initialize an empty list to store data
    data = []

    # Open the file and read line by line, skipping the first 9 lines
    with open(file_path, 'r') as file:
        for _ in range(skip_lines):
            next(file)  # Skip lines

        # Read the remaining lines
        for line in file:
            # Split the line based on space delimiter
            split_line = line.split()
            # Append the split line to data
            data.append(split_line)

    # Create a DataFrame from the data
    df = pd.DataFrame(data)

    # Select specific columns from the DataFrame
    target_df = df.iloc[:, feature_to_extract].copy()
    target_df.rename(columns={0: 'LUC', 1: 'HRU', 3: 'SUB',12:'SW', 69: 'BIOM', 71: 'YLD'}, inplace=True)
    ana_avr_df = target_df.tail(n_hru)

    # Initialize an empty list to store DataFrames
    dfs = []
    year_df = target_df.copy()[:-n_hru]  # Make a copy for month_df

    # Loop over n_Y and create DataFrames for each year
    for i in range(n_Y):
        # Extract rows for the current year
        new_df_indices = range((13 * i * n_hru), ((13 * (i + 1) - 1) * n_hru))
        new_df = target_df.iloc[new_df_indices]
        
        # Remove the rows for the current year from month_df
        year_df =year_df.drop(new_df_indices)
       
        # Append the DataFrame for the current year to the list
        new_df.reset_index(drop=True, inplace=True)
        dfs.append(new_df)

    # Concatenate all DataFrames in the list for year_df
    year_df.reset_index(drop=True, inplace=True)
    ana_avr_df.reset_index(drop=True, inplace=True)
    ana_avr_df = ana_avr_df.astype({'SUB':int,'HRU':int,'SW':float,'BIOM': float, 'YLD': float})  
    month_df = pd.concat(dfs, ignore_index=True)
    month_df= month_df.astype({'SUB':int,'HRU':int,'SW':float,'BIOM': float, 'YLD': float})
    month_list = dfs
    return month_df,month_list,year_df, ana_avr_df

# Example
# MAIN for OUTPUT HRU dataframe extraction
file_path = 'output.hru'
n_Y = 4
n_hru = 69
skip_lines = 9
feature_to_extract=[0, 1, 3, 12 ,69, 71]##lai 70
month_df,month_list, year_df, ana_avr_df= process_data(file_path, n_Y, n_hru, skip_lines,feature_to_extract)
'''
print("Month DataFrame:")
print(month_df)
print("\nYear DataFrame:")
print(year_df)
print("\nAna Avr DataFrame:")
print(ana_avr_df)
'''

'\nprint("Month DataFrame:")\nprint(month_df)\nprint("\nYear DataFrame:")\nprint(year_df)\nprint("\nAna Avr DataFrame:")\nprint(ana_avr_df)\n'

### 3. Extract target variable from output.mgt

In [3]:
##OUTPUT MGT
##extract mgt plant data

def read_lines_with_string(file_path, search_string):
    # List to store lines containing the search string
    lines_containing_string = []

    # Open the file and iterate over its lines
    with open(file_path, "r") as file:
        for line in file:
            # Check if the search string is present in the line
            if search_string in line:
                # If found, append the line to the list
                columns = line.split()
                lines_containing_string.append(columns)
        
    lines= lines_containing_string[n_hru*SKY:]         

    # Create a DataFrame from the lines containing the search string
    df = pd.DataFrame(lines)
    target_df = df.iloc[:, feature_to_extract].copy()
    target_df.rename(columns={0: 'SUB', 1: 'HRU', 2: 'YEAR',5:'AREA', 6:'LUC',11:'BIOM',15: 'YLD'}, inplace=True)
    target_df= target_df.astype({'SUB':int,'HRU':int,'BIOM': float, 'YLD': float})
    return target_df

# Example:
SKY=2
feature_to_extract=[0, 1, 2, 5, 6,11, 15]
file_path = 'output.mgt'
search_string = "HARV"
harv_df= read_lines_with_string(file_path, search_string)


### 4. Evaluate SWAT-Check based output.hru

In [5]:
# Calculate average values grouped by 'LU' category
avg_values = ana_avr_df.groupby('LUC').mean()

# consider size
dfwithsize=ana_avr_df[['BIOM','YLD']].mul(size,axis=0)*100
id_df=ana_avr_df['LUC']
id_df.reset_index(drop=True, inplace=True)
df_combined = pd.concat([id_df, dfwithsize,size], axis=1)
df_combined.groupby('LUC').sum()

Unnamed: 0_level_0,BIOM,YLD,AREAkm2
LUC,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
AGRR,0.0,0.0,115.5
BERM,1622.973,0.0,31.914
FRST,51095.5769,0.0,79.678
PAST,936.032,0.0,94.0
