**Purpose:** testing the built-in RegEx module from Python

**Goal:** represent and identify OMI and Landsat file naming conventions

Resource: https://www.w3schools.com/python/python_regex.asp

In [None]:
import re

# Landsat File Naming Conventions

## LXS PPPRRR YYYYDDD GSIVV

### Information about what type of satellite and sensor
    L = Landsat
    X = Sensor
        'C' = OLI & IRS
        'O' = OLI only
        'I' = IRS only
        'E' = ETM+
        'T' = TM
        'M' = MSS
    S = Satellite
        '8' = Landsat-8
        '7' = Landsat-7
        ...
        '1' = Landsat-1

### Swath location on Earth using Worldwide Reference System (WRS)
    PPP -> WRS path
    RRR -> WRS row

### Date of swath acquisition using Julian calendar (# of days in year starting on Jan. 1)
    YYYY -> year of acquisition
    DDD -> Julian day of year

### How the data was received by a ground station
    GSI -> Ground station identifier
    VV -> Archive version number

### ex. LT50830152011198GLC00.hdf
        • Landsat-5 TM
        • WRS Path: 083; WRS Row: 015
        • Acquisition Date: July 17, 2011
        • Gilmore Creek ground station at NOAA facility near Fairbanks, Alaska

In [None]:
lxs = "L[COITEM][1-8]"
loc = "[0-9][0-9][0-9]" + "[0-9][0-9][0-9]"
date = "[12][0-9][0-9][0-9]" + "[0-3][0-9][0-9]"
data = "[A-Z][A-Z][A-Z]" + "[0-9][0-9]"
ext = "[.]..."

In [None]:
landsat = "^"+lxs+loc+date+data+ext

landsat

In [None]:
fn = "LT50830152011198GLC00.hdf"

In [None]:
x = re.search(landsat, fn)

x

In [None]:
if isinstance(x, type(None)):
    print("May not be a Landsat file.")
elif x.group() == fn:
    print("The whole string matches! This must be a Landsat file!")

# OMI Naming Conventions

## \<Instrument ID>_\<Data Type>_\<Data ID>_\<Version Info>.\<Suffix>

### Instrument ID: ID for instrument and spacecraft

    ex. 'OMI-Aura' -> OMI on the Aura spacecraft
    
### Data Type: level and product indicators

    ex. 'L3-OMNO2d' -> OMI Level 3 NO2 data product
    
### Data ID:

    Format:
        <date>
        <yyyy>m<mmdd>t<hhmmss>
        
### Version:
    Format:
        v<version>-<production date and time>
            version -> <nnn>
            date-time -> <yyyy>m<mmdd>t<hhmmss>
            
### Suffix
    ex. 'he5'
    

### ex. OMI-Aura_L3-OMTO3e_2022m0709_v003-2022m0711t031807.he5
    Instrument ID: OMI on the Aura spacecraft
    Data Type: OMI Level 3
    Data ID: July 9, 2022
    Version:
        Version No. 003
        Production Date & Time - July 11, 2022 at 03:18:07 (UTC)
    Suffix: he5

In [None]:
instr = "OM.+"
dtype = "L[1-3]-.+"
date = "[1-2][0-9][0-9][0-9]m[0-1][0-9][0-3][0-9].*"
vers = "v[0-0][0-9][0-9]-" + date + "t[0-2][0-9][0-6][0-9][0-6][0-9]"
ext = "[.]..."

In [None]:
omi = "^"+instr+"_"+dtype+"_"+date+"_"+vers+ext

omi

In [None]:
fn = "OMI-Aura_L3-OMTO3e_2022m0709_v003-2022m0711t031807.he5"

In [None]:
x = re.search(omi, fn)

x

In [None]:
if isinstance(x, type(None)):
    print("May not be an OMI file.")
elif x.group() == fn:
    print("The whole string matches! This must be an OMI file!")