<a href="https://colab.research.google.com/github/danghh333/MarkSim_conversion_to_ORYZA/blob/main/MarkSim_conversion_to_ORYZA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import pandas as pd
import numpy as np
from io import StringIO
import seaborn as sns

In [None]:
INPUT_FOLDER = "/content/drive/MyDrive/RCP Data"
OUTPUT_FOLDER = "/content/drive/MyDrive/RCP Data/output"
START_YEAR = 2030
END_YEAR = 2049
STATION_CODE = 3
STATION_NAME = "KGIA"
COORDINATES_LINE = "105.145, 9.946, 2.0, 0, 0"

In [None]:
def calculate_vp(t_min):
    return 0.6108 * np.exp((17.27 * t_min) / (237.3 + t_min)) # Tetens-equation

def get_wind_range(csv_path):
    try:
        df_sample = pd.read_csv(csv_path)
        wind_col = next((col for col in df_sample.columns if 'Wind_speed' in col), None)
        if wind_col:
            min_w = df_sample[wind_col].min()
            max_w = df_sample[wind_col].max()
            print(f"Min={min_w}, Max={max_w}")
            return min_w, max_w
        else:
            print("-> Cannot find, use default value: 1.5 - 4.5 m/s")
            return 1.5, 4.5
    except Exception as e:
        print(f"-> Cannot find, use default value: 1.5 - 4.5 m/s")
        return 1.5, 4.5

def is_leap(year):
    return (year % 4 == 0 and year % 100 != 0) or (year % 400 == 0)

def process_file():
    if not os.path.exists(OUTPUT_FOLDER):
        os.makedirs(OUTPUT_FOLDER)
    wind_min, wind_max = get_wind_range("/content/weather_data_2011.csv")
    files = sorted([f for f in os.listdir(INPUT_FOLDER) if f.endswith('.WTG')])

    current_process_file_idx = 0

    for target_year in range(START_YEAR, END_YEAR + 1):
        if current_process_file_idx >= len(files):
            break

        filename = files[current_process_file_idx]
        filepath = os.path.join(INPUT_FOLDER, filename)

        year_suffix = f"{target_year % 1000:03d}"
        output_filename = f"{STATION_NAME}{STATION_CODE}.{year_suffix}"
        output_path = os.path.join(OUTPUT_FOLDER, output_filename)

        print(f"Processing: {filename}  --->  {output_filename}")

        try:
            # Cleaning
            with open(filepath, 'r') as f:
                raw_lines = f.readlines()
            clean_lines = [line for line in raw_lines if line.strip() and line.strip()[0].isdigit()]
            clean_content = "".join(clean_lines)

            df = pd.read_csv(StringIO(clean_content), sep='\s+', header=None,
                             names=['DATE', 'SRAD', 'TMAX', 'TMIN', 'RAIN'])
            df = df.apply(pd.to_numeric, errors='coerce').dropna()

        except Exception as e:
            print(f"Error {filename}: {e}")
            current_process_file_idx += 1
            continue

        target_is_leap = is_leap(target_year)
        data_list = df.to_dict('records')

        if target_is_leap and len(data_list) == 365:
            data_list.append(data_list[-1])
        elif not target_is_leap and len(data_list) == 366:
            if len(data_list) > 59: data_list.pop(59)

        with open(output_path, 'w') as f:
            f.write(f"{COORDINATES_LINE}\n")

            # 9 columns
            # Station code, Year, DOY, SRAD, TMIN, TMAX, VP, WIND, RAIN
            doy = 1
            for row in data_list:
                try:
                    srad_kj = float(row['SRAD']) * 1000
                    t_min = float(row['TMIN'])
                    t_max = float(row['TMAX'])
                    vp = calculate_vp(t_min)
                    wind = np.random.uniform(wind_min, wind_max)
                    rain = float(row['RAIN'])

                    line = (f"{STATION_CODE},{target_year},{doy},"
                            f"{srad_kj:.1f},{t_min:.1f},{t_max:.1f},"
                            f"{vp:.2f},{wind:.1f},{rain:.1f}\n")
                    f.write(line)
                    doy += 1
                except:
                    continue

        current_process_file_idx += 1

    print("\n Finished")



  df = pd.read_csv(StringIO(clean_content), sep='\s+', header=None,


In [None]:
process_file()

Min=5, Max=28
Processing: CLIM0101.WTG  --->  KGIA3.030
Processing: CLIM0201.WTG  --->  KGIA3.031
Processing: CLIM0301.WTG  --->  KGIA3.032
Processing: CLIM0401.WTG  --->  KGIA3.033
Processing: CLIM0501.WTG  --->  KGIA3.034
Processing: CLIM0601.WTG  --->  KGIA3.035
Processing: CLIM0701.WTG  --->  KGIA3.036
Processing: CLIM0801.WTG  --->  KGIA3.037
Processing: CLIM0901.WTG  --->  KGIA3.038
Processing: CLIM1001.WTG  --->  KGIA3.039
Processing: CLIM1101.WTG  --->  KGIA3.040
Processing: CLIM1201.WTG  --->  KGIA3.041
Processing: CLIM1301.WTG  --->  KGIA3.042
Processing: CLIM1401.WTG  --->  KGIA3.043
Processing: CLIM1501.WTG  --->  KGIA3.044
Processing: CLIM1601.WTG  --->  KGIA3.045
Processing: CLIM1701.WTG  --->  KGIA3.046
Processing: CLIM1801.WTG  --->  KGIA3.047
Processing: CLIM1901.WTG  --->  KGIA3.048
Processing: CLIM2001.WTG  --->  KGIA3.049

 Finished
