# Notebook to read CTL Files

## The files were read using RegEx and other methods

The Notebook is divided in functions that use RegEx to read some ctl files

In [1]:
import re

# Basepath for the file
CTL_PATH = r'GPOSNMC20170906122017090612P.inz.TQ0666L064.ctl'

# Open the file
ctl = open(CTL_PATH)
# conteudo = ctl.read()
# print(conteudo)

The function below is to get the amount of points in the 'xyt' axis

1. x = points in longitude
2. y = points in latitude
3. t = how many values of time

In [2]:
def get_xyt(ctl_file):
    ctl_file.seek(0)
    content = ctl_file.read()
    
    # Pattern to get the xyt vars
    pattern = re.compile(r'\d+(?=\slinear)')
    matches = pattern.finditer(content)
    
    # List to store the values
    xyt = []
    
    for m in matches:
        xyt.append(int(m.group()))
    
    return xyt

xyt = get_xyt(ctl)
xyt

[2000, 1000, 1]

In [3]:
# Get the total levels that the file works
# These are pressure levels...how many pressure levels is the var gonna have?
def get_total_levels(ctl_file):
    ctl_file.seek(0)
    ctl_content = ctl_file.read()
    # (?<=zdef\s{4})\d+ --> wrong
    pattern = re.compile(r"(?<=zdef\s{4})\d+")
    matches = pattern.search(ctl_content)

    return matches.group()

tot_levels = get_total_levels(ctl)
tot_levels

'33'

In [4]:
# Get the total number of meteorological variables in the file
def get_total_vars(ctl_file):
    ctl_file.seek(0)
    ctl_content = ctl_file.read()
    
    # RegEx pattern to get the vars.
    pattern = re.compile(r"(?<=vars(\s{4}))\d+")
    match = pattern.search(ctl_content)
    return match.group()

tot = get_total_vars(ctl)
tot

'30'

In [5]:
# This function is to get the correspondent grib file that the ctl is related to.
def get_file_name(ctl_file):
    ctl_file.seek(0)
    ctl_content = ctl_file.read()
    pattern = re.compile(r'(?<=dset\s\^)[A-Za-z0-9]+\.?(fct|icn|inz)?\.?[a-zA-Z0-9]*\.(grb|grib2)')
    match = pattern.search(ctl_content)
    return match.group()

dset_grib = get_file_name(ctl)
dset_grib

'GPOSNMC20170906122017090612P.inz.TQ0666L064.grb'

In [6]:
# Function to get the starting point in lats and lons.
# It also returns the spacing between each point.
def get_space_latlons(ctl_file):
    ctl_file.seek(0)
    ctl_content = ctl_file.read()
    
    # Pattern to get the starting and spacing
    pattern = re.compile(r'[-+]?[0-9]+\.\d{3,10}')
    matches = pattern.finditer(ctl_content)

    keys = ['lon_start', 'lon_dist', 'lat_start', 'lat_dist']
    values = []
    info = {}

    values = [m.group() for m in matches]
    
    # Make a dict with the values
    for i, j in enumerate(keys):
        info[j] = values[i]

    return info

info_about_coords = get_space_latlons(ctl)
info_about_coords

{'lat_dist': '0.1800000000',
 'lat_start': '-89.910',
 'lon_dist': '0.1800000000',
 'lon_start': '0.000'}

The next function is meant to return the units of the variables. It's kind of working, but no properly.

In [10]:
def get_vars_units(ctl_file):
    units = []
    ctl_file.seek(0)
    ctl_content = ctl_file.read()
    pattern = re.compile(r'(?<=(\(|\[))[A-Za-z*0-9,\%/-]*\s?[A-Za-z*0-9,\%/-]*(\s+|\])\W')
    matches = pattern.finditer(ctl_content)

    for m in matches:
        print(m.group()[:-2])

get_vars_units(ctl)

m
0,1
HPA            
M/S            
M/S            
M/S            
M/S            
PA/S           
1/S            
M2/S           
M2/S           
GPM            
HPA            
K              
K              
NO DIM         
NO DIM         
KG/KG          
KG/M2          
K              
K              
0-1            
0-1            
0-1            
K              
KG/KG          
g/m**3         
KG/KG          
KG/KG          
%              


In [11]:
def get_start_end_vars(ctl_file):
    ctl_file.seek(0)
    start, end = 0, 0 
    for no, line in enumerate(ctl_file):
        if line[:4] == 'vars':
            # Add 1 because it starts at 0
            start = no + 1
        if line[:7] == 'endvars':
            end = no + 1

    return start, end

def get_name_vars(ctl_file, start_line, end_line):
    ctl_file.seek(0)
    var_list = []
    for n, l in enumerate(ctl_file):
        if n > start_line + 1 and n < end_line:
            var = l[38:78]
            var_list.append(var)

    return var_list

# Get the first 2 vars in the ctl file
# They are pattern variables and never change
# Consider them
def get_two_vars(ctl_file):
    pattern = re.compile(r'surface\s[A-Z]*\s?[A-Z]*\s?[A-Z]*')
    matches = pattern.finditer(ctl_file)
    var = []
    for m in matches:
        var.append(m.group().strip())
    return var

start, end = get_start_end_vars(ctl)
grib_vars = get_name_vars(ctl, start, end)
for v in grib_vars:
    print(v)

SURFACE PRESSURE                        
SURFACE ZONAL WIND (U)                  
ZONAL WIND (U)                          
SURFACE MERIDIONAL WIND (V)             
MERIDIONAL WIND (V)                     
OMEGA                                   
VORTICITY                               
STREAM FUNCTION                         
VELOCITY POTENTIAL                      
GEOPOTENTIAL HEIGHT                     
SEA LEVEL PRESSURE                      
SURFACE ABSOLUTE TEMPERATURE            
ABSOLUTE TEMPERATURE                    
SURFACE RELATIVE HUMIDITY               
RELATIVE HUMIDITY                       
SPECIFIC HUMIDITY                       
INST. PRECIPITABLE WATER                
SURFACE TEMPERATURE                     
DEEP SOIL TEMPERATURE                   
SOIL WETNESS OF SURFACE                 
SOIL WETNESS OF ROOT ZONE               
SOIL WETNESS OF DRAINAGE ZONE           
TEMPERATURE AT 2-M FROM SURFACE         
SPECIFIC HUMIDITY AT 2-M FROM SURFACE   
PARTIAL OXYGEN D