In [65]:
import os
import pandas as pd
import WS_Mdl.utils as U
import shutil as sh

In [2]:
def read_msw_file(filename, headers_key):
    """
    Read an MSW input file using fixed-width format parsing.
    
    Parameters:
    -----------
    filename : str
        Name of the file in the MSW_In directory
    headers_key : str
        Key in d_headers dictionary for column names
        
    Returns:
    --------
    pd.DataFrame
        Parsed data with proper column names
    """
    # Read with automatic fixed-width detection
    df_raw = pd.read_fwf(U.PJ('MSW_In', filename), header=None)
    
    # Get expected number of columns
    expected_headers = d_headers[headers_key]
    num_expected_cols = len(expected_headers)
    
    # Take only the expected number of columns (some files may have extra columns)
    df = df_raw.iloc[:, :num_expected_cols].copy()
    
    # Assign proper column names
    df.columns = expected_headers
    
    return df

In [3]:
l_MSW_In = os.listdir('MSW_In')

In [75]:
d_headers = {
    'fact_svat.inp': [
        'vegetation type',
        'day number',
        'soil cover',
        'leaf area index',
        'interception capacity',
        'vegetation factor',
        'factor for interception evaporation',
        'factor for bare soil evaporation',
        'factor for ponding',
        'crop height',
        'dynamic root zone depth',
        #'correction factor for CO2-effect on transpiration',
        ],
    'init_svat.inp': ['pF of root zone'],
    'luse_svat.inp': [
        'index of land use type',
        'name of land use type',
        'index of vegetation type',
        'Jarvis parameter for o2 stress',
        'Jarvis parameter for drought stress',
        'p1 Feddes function',
        'p2 Feddes function',
        'p3h Feddes function',
        'p3l Feddes function',
        'p4 Feddes function',
        't3 Feddes function',
        't3 Feddes function_',
        'pressure head begin sprinkling/drought stress alpha begin sprinkling',
        'fraction evaporated sprinkling water',
        'gift in rotational period',
        'duration gift',
        'rotational period',
        'beginning of sprinkling period (season)',
        'end of sprinkling period',
        'albedo in Penman-Monteith (PM)',
        'crop resistance, dry leaves in PM',
        'crop resistance, wet leaves in PM',
        'soil resistance in PM',
        'extinction coefficient diffuse light',
        'extinction coefficient direct light',
        'option parameter for interception',
        'interception capacity per LAI, Rutter',
        'intercept of interception evaporation',
        'interception capacity per LAI, Von H',
        'free throughflow coefficient, Gash',
        'stem flow coefficient, Gash',
        'interception capacity of canopy, Gash',
        'average rainfall intensity, Gash',
        'average evaporation intensity Gash',
        'max. concentration for no salt stress, Maas & Hoffman model',
        'decline of salt stress coefficient per unit of salt concentration, Maas & Hoffman'
        ],
    'mete_grid.inp': [
        'time from 00:00:00',
        'year number',
        'path',
        'etrefgrid',
        'free',
        'path',
        'tempmxgrid',
        'string (max 256 char’s),',
        'ame mean temperature grid',
        '-',
        'enclosed in “ “',
        'free',
        'path',
        'windgrid',
        ],
    'sel_key_svat_per.inp': [
        'do not include variable in database',
        'include variable in database, make idf file with unit m (m3 per active m2)',
        'include variable in database, make idf file, with unit m3',
        'include variable in database, make idf file, with unit m and m3'
    ],
    'tiop_sim.inp': [
        'time from beginning of year at 00:00:00',
        'year number',
        'option number'
    ],
    'para_sim.inp': [
        'vegetation type',
        'path of crop file, including name of the crop file itself',
        'time of crop emergence',
        'time of crop harvest',
        'Crop type',
        'Type of initialization',
    ]
}

In [76]:
d_colspecs = {'luse_svat.inp': [
    (1, 6),
    (8, 26),
    (27, 32),
    (33, 35),
    (36, 38),
    (39, 46),
    (47, 54),
    (55, 62),
    (63, 70),
    (71, 78),
    (79, 86),
    (87, 94),
    (95, 102),
    (103, 110),
    (111, 118),
    (119, 126),
    (127, 132),
    (133, 138),
    (139, 144),
    (145, 152),
    (153, 160),
    (161, 168),
    (169, 176),
    (177, 184),
    (185, 192),
    (193, 198),
    (199, 206),
    (207, 214),
    (215, 222),
    (223, 230),
    (231, 238),
    (239, 246),
    (247, 254),
    (255, 262),
    (263, 270),
    (271, 278),
    #(279, 350),    
    ]
    }

In [84]:
for i in l_MSW_In:
    try:        
        if i == 'mete_grid.inp':
            DF = pd.read_csv(U.PJ('MSW_In', i), header=None) # , names=d_headers[i]
        elif i in d_colspecs:
            DF = pd.read_fwf(U.PJ('MSW_In', i), colspecs=d_colspecs[i], header=None) # , l_headers=d_headers[i]
        elif i in ('para_sim.inp', 'sel_key_svat_per.inp'):
            print(i, "🟡 - Already annotated (by default MSW settings). File got copied so they're all in the same folder.")
            sh.copy2(U.PJ('MSW_In', i), U.PJ('MSW_In_annotated', i))
            continue            
        else:
            DF = pd.read_fwf(U.PJ('MSW_In', i), header=None) # , l_headers=d_headers[i]
        DF.columns = d_headers[i][:DF.shape[1]]
        DF.to_csv(U.PJ('MSW_In_annotated', i.replace('inp', 'csv')), index=False)
        print(i, '🟢')
        # print(DF)
    except Exception as e:
        print(i, '🔴', e)

fact_svat.inp 🟢
init_svat.inp 🟢
luse_svat.inp 🟢
mete_grid.inp 🟢
para_sim.inp 🟡 - Already annotated (by default MSW settings). File got copied so they're all in the same folder.
sel_key_svat_per.inp 🟡 - Already annotated (by default MSW settings). File got copied so they're all in the same folder.
tiop_sim.inp 🟢


# Junkyard

## d_headers constructor

In [72]:
s = """free
F
td
d
time from 00:00:00
free
I
iy
-
year number
free
string (max 256 char’s), enclosed in “ “
precgrid
mm/d
path\name precipitation grid
free
string (max 256 char’s),
enclosed in “ “
etrefgrid
mm/d
path\name evapotranspiration grid
(reference crop values)
free
string (max 256 char’s),
enclosed in “ “
tempmngrid
ºC
path\name min. day temperature grid
free
string (max 256 char’s),
enclosed in “ “
tempmxgrid
ºC
path\name max. day temperature grid
free
string (max 256 char’s),
enclosed in “ “
tempgrid
ºC
path\name mean temperature grid
free
string (max 256 char’s),
enclosed in “ “
Nrelgrid
-
path\name relative sunshine duration grid
free
string (max 256 char’s),
enclosed in “ “
radgrid
kJ/m2/d
path\name radiation grid
free
string (max 256 char’s),
enclosed in “ “
humgrid
kPa
path\name humidity grid
free
string (max 256 char’s),
enclosed in “ “
windgrid
m/s
path\name wind speed grid"""

In [74]:
i = 1
for Ln in s.split('\n'):
    if (i) % 5 == 0:
        print(Ln)
    i += 1

time from 00:00:00
year number
path
etrefgrid
free
path
tempmxgrid
string (max 256 char’s),
ame mean temperature grid
-
enclosed in “ “
free
path
windgrid


## Test the function with different files
for i, filename in enumerate(l_MSW_In[:3]):  # Test first 3 files
    if filename in d_headers:
        print(f"\n=== {filename} ===")
        try:
            df = read_msw_file(filename, filename)
            print(f"Shape: {df.shape}")
            print(f"Columns: {list(df.columns)}")
            print("First row:")
            print(df.iloc[0])
        except Exception as e:
            print(f"Error reading {filename}: {e}")
    else:
        print(f"\n=== {filename} === (no headers defined)")
        # Just show the raw structure
        try:
            df_raw = pd.read_fwf(U.PJ('MSW_In', filename), header=None)
            print(f"Raw shape: {df_raw.shape}")
            print("First few values:")
            print(df_raw.iloc[0, :min(5, df_raw.shape[1])].tolist())
        except Exception as e:
            print(f"Error: {e}")