In [40]:
import subprocess
import os
import io
import re
import csv
from shutil import rmtree

from datetime import datetime
import abc
import collections
from typing import Generic, TypeVar, Generic, Optional, List, Type
from pydantic import BaseModel, ValidationError, validator, Field
from pydantic.generics import GenericModel


import pandas as pd
import geopandas as gpd
import qgrid
from IPython.display import display, JSON
import ipywidgets as widgets

import json

from uuid import UUID, uuid4

In [41]:
MARXAN_FOLDER = '/home/jovyan/work/datasets/raw/marxan_Coral_Triangle_Case_Study_mod'
MARXAN_EXECUTABLE = f'{MARXAN_FOLDER}/MarOpt_v243_Linux64'
MARXAN_INPUTDATA = 'input.dat'

In [42]:
# Help edit the .dat files
!ls $MARXAN_FOLDER
!ls $MARXAN_FOLDER/input

DebugTraceFile_MarOpt.txt  MarOptTotalAreas.csv  output_0.001  output_1
input			   MarOpt_v243_Linux64	 output_0.01   output_10
input.dat		   output		 output_0.1    output_100
 boundary.dat			      pu.dat
'Coral Triangle Costs Profile.cost'   puvspr.dat
 feature_preprocessing.dat	      spec.dat
 protected_area_intersections.dat     spec_notarget.dat


## Helper clases and funtions for reading and data mutation

In [43]:
class DatFile(object):
    """
    Read and write dat files.
    """
    def __init__(self, file_path: str = None):
        # Ensure the file has the right extension
        if file_path and not file_path.endswith('.dat'):
            raise NameError("File must be a '.dat' extension")
        
        self.__path = file_path
        self.data = None
        
    def __test_path(self, path):
        if not self.__path and path:
            self.__path = path
        elif not self.__path and not path:
            raise NameError('No path to file provided')
    
    def update(self, data, incremental: bool = False ):
        """Updates the data.
    
        Args:
            data (any): The data the object should store.
        Returns:
            The contents of the file as a unicode string.
        """
        __conversion: dict = {
                                int: int,
                                str: str,
                                dict: dict,
                                list: list
                            }
        if not incremental or not self.data:
            self.data = data
        elif type(self.data) == dict:
            self.data.append(data)
        else:
            self.data = self.data + __conversion[type(self.data)](data)
        
    
    def read(self, file_path: str = None):
        """Gets a files contents as a unicode string.
    
        Args:
            filename (string): The full path to the file that will be read.
        Returns:
            The contents of the file as a unicode string.
        """
        self.__test_path(file_path)
        try:
            with io.open(self.__path, mode="r", encoding="utf-8") as f:
                self.data = f.readlines()
        except (UnicodeDecodeError) as e:
            with io.open(self.__path, mode="r", encoding="ISO-8859-1") as f:
                self.data = f.read()
        except Exception as e:
            raise e
    
    def write(self, file_path: str = None):
        """Writes a files contents as a unicode string
    
        Args:
            filename (string): The full path to the file that will be written.  
            s (string): The unicode string to write.  
            mode (string): Optional. The file write mode. Default value is w.  
        Returns:
            None  
        """
        self.__test_path(file_path)
        try:
            with io.open(self.__path, mode, encoding="utf-8") as f: 
                f.write(self.data)
        except Exception as e:
            raise e

In [51]:
# Just in case we need to format the float numbers in a very specific way
class MyNumber:
    """
    Number formater wraper to allow a specific format string for a float number.
    """
    def __init__(self, val):
        self.val = val

    def __format__(self,format_spec):
        ss = ('{0:'+format_spec+'}').format(self.val)
        if ( 'E' in ss):
            mantissa, exp = ss.split('E')            
            return mantissa + 'E'+ exp[0] + '00' + exp[1:]
        return ss

def num(s):
    """
    Coerce Number transformation into float.
    """
    try:
        return int(s)
    except ValueError:
        try:
            return float(s)
        except ValueError:
            return s

def getSizeOfNestedList(listOfElem):
    ''' Get number of elements in a nested list'''
    count = 0
    # Iterate over the list
    for elem in listOfElem:
        # Check if type of element is list
        if type(elem) == list:  
            # Again call this function to get the size of this element
            count += getSizeOfNestedList(elem)
        else:
            count += 1    
    return count

def _readTabularFile(filename: str)-> dict:
    """
    Gets a input.dat file and outputs a dict of parameters.

    Args:
        filename (string): The full path to the file that will be read.
    Returns:
        The contents of the file as a dict.
    """
    inputData = DatFile(filename)
    inputData.read()
    outputData = []
    
    dialect= csv.Sniffer().sniff(inputData.data[0],[',', '\t',' '])
    for line in inputData.data: 
        if dialect.delimiter ==',':
            pair= re.compile('\s').split(line.replace('"','').replace(' ', '_').strip('""').strip('\r').strip('\n').replace(',', '\t'))
        else:
            pair= re.compile('\s').split(line.replace('"','').strip('""').strip('\r').strip('\n'))
        outputData.append(pair)
    return outputData


def CreateListModelFromFile(filename: str, model: Type['Model'])-> List['Model']:
    """
    Gets a input.dat file and outputs a list of selected model.

    Args:
        filename (string): The full path to the file that will be read.
        model (Type['Model']): Data model used to read and validate the data
    Returns:
        The contents of the file as a Data model list.
    """
    inputData = _readTabularFile(filename)
    return [model.parse_obj(dict(zip(inputData[0], x))) for x in inputData[1:]] 

def CreateFileFromDF(filename: str, df: Type['Dataframe'], model: Type['Model'])-> List[list]:
    """
    Gets a dataframe and outs a dat.file.

    Args:
        filename (string): The full path to the file that will be read.
        df (Type['Dataframe']): Dataframe to save
        model (Type['Model']): Data model used to read and validate the data
    Returns:
        The contents of the file as a Data model list.
    """
    if model == inputDatFile:
        data = df.transpose().to_dict('records')
        validatedData = [inputDatFile(**x) for x in data]
        keys = data[0].keys()
        csv.register_dialect('dat', delimiter=' ')
        with open(filename, 'w', encoding='utf8', newline='')  as output_file:
            dict_writer = csv.writer(output_file, dialect='dat')
            for row in data[0].items():
                dict_writer.writerow(row)
    else:
        data = df.to_dict('records')
        validatedData = [model(**x) for x in data]
#         keys = validatedData[0].__dict__.keys()
#         csv.register_dialect('dat', delimiter='\t')
#         with open(filename, 'w', encoding='utf8', newline='')  as output_file:
#             dict_writer = csv.DictWriter(output_file, keys, dialect='dat')
#             dict_writer.writeheader()
#         dict_writer.writerows(toCSV)
#             dict_writer.writerows(data)
    
        keys={k: v for k, v in data[0].items() if v is not None}.keys()
        dataNotNone = list(({key : val for key, val in sub.items() if val!= None} for sub in data)) 

        csv.register_dialect('dat', delimiter=' ')
        with open(filename, 'w', encoding='utf8', newline='')  as output_file:
            dict_writer = csv.DictWriter(output_file, keys, dialect='dat')
            dict_writer.writeheader()
            dict_writer.writerows(dataNotNone)
    
    
    return validatedData

def save_button(filename: str, model: Type['Model'], data: Type['QgridWidget'])-> None:
    """
    creates a widget button and attach a on click event.
    
    Args:
        filename (string): The full path to the file that will be read.
        data (Type['QgridWidget']): Qgrid widget
        model (Type['Model']): Data model used to read and validate the data
    """
    button = widgets.Button(description="Save")
    output = widgets.Output()

    display(button, output)

    def on_button_clicked(b):
        with output:
            CreateFileFromDF(filename, data.get_changed_df(), model)

    button.on_click(on_button_clicked)

## Input data model types

In [45]:
class inputDatFile(BaseModel):
    """
    This is the description of the input data clase base on marxan input file.
    """
    
    # General Parameters
    VERSION: str = Field('0.1', title='Version', 
                         description='Type of input file')
    BLM:  Optional[float] = Field(0., title='Boundary Length Modifier', 
                           description='Boundary Length Modifier')
    PROP: float = Field(0., title='Starting Proportion', 
                            description='Proportion of planning units in initial reserve system')
    RANDSEED: Optional[int] = Field(-1, title='Random Seed', 
                              description='Random seed number')
    NUMREPS: int = Field(1, title='Repeat Runs', 
                             description='The number of repeat runs you wish to do')
    BESTSCORE: Optional[int] = Field(0, title='Best Score Speedup', 
                               description='This variable tells Marxan not to keep track of the best score \
                                until it reaches a specified minimum level.')
    
    # Annealing Parameters
    NUMITNS: int = Field(0, title='Number of Iterations', 
                         description='Number of iterations for annealing')
    STARTTEMP: int = Field(1, title='Initial Temperature', 
                           description='Starting temperature for annealing')
    COOLFAC: int = Field(0, title='Cooling Factor', 
                         description='Cooling factor for annealing')
    NUMTEMP: int = Field(1, title='Temperature Decreases', 
                         description='Number of temperature decreases for annealing')
    
    # Cost Threshold
    COSTTHRESH: Optional[float] = Field(0, title='Threshold', 
                              description='Cost threshold')
    THRESHPEN1: Optional[float] = Field(0, title='Penalty Factor A', 
                              description='Size of cost threshold penalty')
    THRESHPEN2: Optional[float] = Field(0, title='Penalty Factor B', 
                              description='Shape of cost threshold penalty')
    
    # Input Files
    INPUTDIR: str = Field('input', title='Input Folder', 
                          description='User Defined Name of the folder containing input data files')
    SPECNAME: str = Field('spec.dat', title='Species File Name', 
                          description='Name of Conservation Feature File')
    PUNAME: str = Field('pu.dat', title='Planning Unit File Name', 
                        description='Name of Planning Unit File')
    PUVSPRNAME: str = Field('puvspr2.dat', title='Planning Unit versus Species', 
                            description='Name of Planning Unit versus Conservation Feature File')
    BOUNDNAME: str = Field('bound.dat', title='Boundary Length', 
                           description='Name of Boundary Length File')
    BLOCKDEFNAME: Optional[str] = Field('blockdef.dat', title='Block Definitions', 
                              description='Name of Block Definition File')
    
    # Output Files
    VERBOSITY: int =  Field(1, title='Screen Output', 
                            description='Amount of output displayed on the program screen')
    MISSLEVEL: Optional[float] =  Field(1, title='Species missing proportion', 
                              description='Amount or target below which it is counted as ‘missing’')
    OUTPUTDIR: str = Field('output', title='', 
                           description='User Defined Name of the folder in which to save output files')
    SCENNAME: str = Field('Default_name', title='Scenario name', 
                          description='Scenario name for the saved output files')
    SAVERUN: Optional[int] = Field(3, title='Save each run', 
                         description='Save each run? (0 = no)')
    SAVEBEST: Optional[int] =  Field(3, title='Save the best run', 
                           description='Save the best run? (0 = no)')
    SAVESUMMARY: Optional[int] =  Field(3, title='Save summary', 
                          description='Save summary information? (0 = no)')
    SAVESCEN: Optional[int] =  Field(3, title='Save scenario', 
                           description='Save scenario information? (0 = no)')
    SAVETARGMET: Optional[int] =  Field(3, title='Save targets met', 
                              description='Save targets met information? (0 = no)')
    SAVESUMSOLN: Optional[int] =  Field(3, title='', 
                              description='Save summed solution information? (0 = no)')
    SAVELOG: Optional[int] =  Field(1, title='Save summed solution', 
                          description='Save log files? (0 = no)')
    SAVESNAPSTEPS: Optional[int] =  Field(0, title='Save snapshots', 
                                description='Save snapshots each n steps (0 = no)')
    SAVESNAPCHANGES: Optional[int] =  Field(0, title='Save snapshots changes', 
                                  description='Save snapshots after every n changes (0 = no)')
    SAVESNAPFREQUENCY: Optional[int] =  Field(0, title='Frequency of snapshots', 
                                    description='Frequency of snapshots if they are being used')
    SAVESOLUTIONSMATRIX: Optional[int] =  Field(3, title='Frequency of snapshots', 
                                    description='Frequency of snapshots if they are being used')
    
    # Program control.
    RUNMODE: int = Field(1, title='Run Options', 
                         description='User Defined The method Marxan uses to find solutions')
    
    ITIMPTYPE: int =  Field(1, title='Iterative Improvement', 
                            description='Iterative improvement type')
    HEURTYPE: int =  Field(1, title='Heuristic', 
                           description='Heuristic type')
    CLUMPTYPE: Optional[int] =  Field(0, title='Clumping Rule', 
                            description='Clumping penalty type')
    
    # class Config:
    @validator('SAVERUN')
    def SAVERUN_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVEBEST')
    def SAVEBEST_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVESUMMARY')
    def SAVESUMMMARY_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVESCEN')
    def SAVESCEN_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVETARGMET')
    def SAVETARGMET_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVESUMSOLN')
    def SAVESUMSOLN_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVELOG')
    def SAVELOG_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVESOLUTIONSMATRIX')
    def SAVESOLUTIONSMATRIX_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('RUNMODE')
    def RUNMODE_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'Apply Simulated Annealing followed by a Heuristic',
                      1: 'Apply Simulated Annealing followed by Iterative Improvement',
                      2: 'Apply Simulated Annealing followed by a Heuristic, followed by Iterative',
                      3: 'Use only a Heuristic',
                      4: 'Use only Iterative Improvement',
                      5: 'Use a Heuristic followed by Iterative Improvement',
                      6: 'Use only Simulated Annealing'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('ITIMPTYPE')
    def ITIMPTYPE_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'Normal Iterative Improvement',
                      1: 'Two Step Iterative Improvement',
                      2: '‘Swap’ Iterative Improvement',
                      3: 'Normal Improvement followed by Two Step Iterative Improvement'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('HEURTYPE')
    def HEURTYPE_is_valid(cls, method: int) -> int:
        allowed_set = {-1:'Ignored',
                      0: 'Richness',
                      1: 'Greedy',
                      2: 'Max Rarity',
                      3: 'Best Rarity',
                      4: 'Average Rarity',
                      5: 'Sum Rarity',
                      6: 'Product Irreplaceability',
                      7: 'Summation Irreplaceability'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('VERBOSITY')
    def VERBOSITY_is_valid(cls, method: int) -> int:
        allowed_set = {-1:'Ignored',
                      0: 'Silent Running',
                      1: 'Results Only',
                      2: 'General Progress',
                      3: 'Detailed Progress'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('CLUMPTYPE')
    def CLUMPTYPE_is_valid(cls, method: int) -> int:
        allowed_set = {-1:'Ignored',
                       0: 'Partial clumps do not count',
                       1: 'Partial clumps count half',
                       3: 'Graduated penalty'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    def to_dat(self):
        """
        Gets a input.dat file and outputs a dict of parameters.

        Args:
            
        Returns:
            The data model converted on a readeable dat string.
        """
        s:str = ''
        for key, value in self.dict().items():
            #add the key
            s = s + key + " " + str(value) + "\n"
        return s
    
    @classmethod
    def from_dat(cls: Type['Model'], dat: str)-> 'Model':
        """
        Gets a input.dat file and outputs a dict of parameters.

        Args:
            filename (string): The full path to the file that will be read.
        Returns:
            The contents of the file as a dict.
        """
        obj = {}
        
        for line in dat:
            if re.search('[A-Z1-9_]{2,}', line, re.DOTALL):
                pair = line.strip('\r').strip('\n').split(' ')
                pair =list(filter(None, pair)) ## delete empty lists
                if len(pair)>2: #remove lists that have more values
                    continue
                assert len(pair) == 2 # if the list has more or less attribute than 2 it means we have make a mistake spliting stuff
                obj[pair[0]] = num(pair[1].strip(' '))
                
        return cls.parse_obj(obj)

In [46]:
class conservationFeature(BaseModel):
    """
    The Conservation Feature File contains information about each of the conservation
    features being considered, such as their name, target representation, and the penalty
    if the representation target is not met. It has the default name ‘spec.dat’. Because of
    this name it is sometimes referred to as the Species File, although conservation
    features will oftenbe surrogates such as habitat type rather than actual species. 
    """
   
    id: int = Field(..., title='Conservation Feature ID', 
                    description='A unique numerical identifier for each conservation feature. \
                                Be careful not to duplicate id numbers as Marxan will ignore all but the last one.')
    
    # All variables you wish to take on Block Definition attributes should
    # have their value entered as -1 in the Conservation Feature File
    type: Optional[int] = Field(title='Conservation Feature Type', 
                                description='Used to define groups of conservation features for which a number of \
                                umbrella attributes can be set for all features within the specified group (or “type”). \
                                Each group of features must have a unique numerical identifier. This variable is used \
                                in conjunction with the Block Definition File (see Section 3.3.2) which will contain \
                                the attributes to be assigned to a particular group of conservation features.')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    target: Optional[float] = Field(title=' Feature Representation Target', 
                          description='The target amount of each conservation feature to be included \
                                        in the solutions. These values represent constraints on potential solutions \
                                        to the reserve selection problem. That is, for a reserve solution to be \
                                        feasible it must include at least this amount of each feature. The target \
                                        value is expressed in the same units used to define the amount of each feature in \
                                        each planning unit, contained in the Planning Unit versus Conservation Feature \
                                        File (see Section 3.2.4). However, units from different conservation features can vary \
                                        (e.g. hectares of habitat for one feature and number of occurrences for another, nests \
                                        for a third and length of stream for a fourth).')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    prop: Optional[float] = Field(title='Proportion Target for Feature Representation', 
                                description='The variable ‘prop’, is short for proportion and can \
                                            be used to set the proportion (i.e. percentage) of a \
                                            conservation feature to be included in the reserve system.',
                               ge =0, le =1)
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    
    spf: float = Field(..., title='Conservation Feature Penalty Factor', 
                       description='The letters ‘spf’ stands for Species Penalty Factor. This \
                                    variable is more correctly referred to as the Conservation Feature Penalty \
                                    Factor.')
    target2: Optional[float] = Field(title='Minimum Clump Size', 
                                     description='This variable specifies a minimum clump size for the\
                                                representation of conservation features in the reserve system. If the amount\
                                                of a conservation feature found in a clump is less that this value, then it does\
                                                not count towards meeting the conservation target')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    targetocc: Optional[float] = Field(title='Target for Feature Occurrences', 
                                       description='This variable specifies the minimum number of occurrences of a\
                                                    conservation feature required in a reserve system. This value can be used in\
                                                    situations where even though your conservation target may be met in one planning\
                                                    unit, you would like it to be represented in a greater number of planning units,\
                                                    possibly for risk spreading')
    
    name: Optional[str] = Field(title='Conservation Feature Name', 
                                description='The alphabetical (no numbers!) name of each conservation feature')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    sepnum: Optional[float] = Field(title='Target for Separated Feature Occurrences', 
                                    description='The number of mutually separated occurrences of a feature \
                                                required in the reserve system')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    sepdistance: Optional[float] = Field(title='Minimum Separation Distance', 
                                      description=' Used in conjunction with ‘sepnum’ (above), this variable specifies\
                                                    the minimum distance at which planning units holding a conservation feature\
                                                    are considered to be separate.')

     
    #Add validator to check either target or prop are present
    #But doesn't trigger
    @validator('prop')
    def check_prop_or_target(cls, v, values):
        if 'target' not in values and not prop:
            raise ValueError('either field target or prop is required')
        return prop
    
                 
class planningUnits(BaseModel):
    """
    The Planning Unit File contains all the information related to planning units, except
    for the distribution of conservation features across planning units (which is held in the
    Planning Unit versus Conservation Feature File ). The default name for this file is
    ‘pu.dat’.
    """
    id: int = Field(..., title=' Planning Unit ID', 
                    description='A unique numerical identifier for each planning unit')
    cost: Optional[float] = Field(1, title='Planning Unit Cost', 
                        description='The cost of including each planning unit in the reserve system. ')
    status: Optional[int] = Field(0, title='Planning Unit Status', 
                          description='This variable defines whether a planning unit (PU) is locked in or out of\
                                      the initial and final reserve systems. It can take one of four values:')
    
    # This variable is only required if a minimum separation between feature occurrences has been specified in the
    # ‘sepdistance’ column of the Conservation Feature File 
    xloc: Optional[float] = Field(title='X Planning Unit Location', 
                          description='The x-axis coordinate of the planning unit')
    
    # This variable is only required if a minimum separation between feature occurrences has been specified in the
    # ‘sepdistance’ column of the Conservation Feature File 
    yloc: Optional[float] = Field(title='Y Planning Unit Location', 
                          description='The y-axis coordinate of the planning unit')
    
    @validator('status')
    def status_is_valid(cls, method: int) -> int:
        allowed_set = {
                       0: 'The PU is not guaranteed to be in the initial reserve',
                       1: 'The PU will be included in the initial reserve',
                       2: 'The PU is fixed in the reserve system (“locked in”).\
                           It starts in the initial reserve system and cannot be removed.',
                       3: 'The PU is fixed outside the reserve system (“locked out”).\
                       It is not included in the initial reserve system and cannot be added.'
                      }
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method


class planningUnitVSConservationFeatureV(BaseModel):
    """
    The Planning Unit versus Conservation Feature File contains information on the
    distribution of conservation features across planning units. It has the default file
    name, ‘puvpsr2.dat’. There are two different formats this file can take, vertical and
    horizontal. Either is acceptable and Marxan will test the header line to determine
    which format is being used. This one represent the vertical format
    """
    species: int = Field(..., title='Conservation Feature ID', 
                         description='The unique id number of each conservation feature. This must \
                                    correspond to the id numbers used in the Conservation Feature File.')
    pu: int = Field(..., title='Planning Unit ID', 
                         description='The id of a planning unit where the conservation feature listed on \
                                    the same row occurs. The planning unit id numbers must correspond \
                                    to the numbers used in the Planning Unit File')
    amount: float = Field(..., title='Conservation Feature Amount', 
                         description='The amount of the conservation feature occurring in the planning unit \
                                      listed on the same row. This amount may be related to the abundance \
                                      of a species or the extent of a certain habitat type. ',
                          gt=0)

class planningUnitVSConservationFeatureH(BaseModel):
    """
    The Planning Unit versus Conservation Feature File contains information on the
    distribution of conservation features across planning units. It has the default file
    name, ‘puvpsr2.dat’. There are two different formats this file can take, vertical and
    horizontal. Either is acceptable and Marxan will test the header line to determine
    which format is being used. . This one represent the horizontal format:
    the Planning Unit versus Conservation Feature File is simply a matrix of 
    planning units versus conservation features.
    """
    pu: int = Field(..., title='Planning Unit ID',
                    description='Amount of output displayed on the program screen')
    species: List[int] = Field(..., title='Conservation Feature IDs', 
                               description='The unique id number of each conservation feature.')
    amount: List[float] = Field(..., title='Conservation Feature Amount', 
                                 description='The amount of the conservation feature occurring in the planning unit \
                                             listed on the same row',
                                 gt=0)


### Optional data file structures

class boundaryLength(BaseModel):
    """
    The Boundary Length File contains information about the length or ‘effective length’
    of shared boundaries between planning units. This file is necessary if you wish to use
    the Boundary Length Modifier to improve the compactness of reserve solutions (bound.dat).
    
    !Any missing values within the file will prevent Marxan from running, for instance 
    !if ‘id1’ and ‘id2’ are set but no value for ‘boundary’ is entered.
    """
    # important not to duplicate boundaries
    id1: int = Field(..., title='Planning Unit ID', 
                     description='‘id1’ and ‘id2’ contain the id number of the two \
                     planning units that share a boundary.')
    id2: int = Field(..., title=' Planning Unit ID', 
                     description='id1’ and ‘id2’ contain the id number of the two \
                     planning units that share a boundary.')
    boundary: float = Field(..., title='Boundary Length', 
                           description='Boundary Length or boundary cost is relative \
                           measure of how important it is to include one planning unit \
                           in the reserve system, given the inclusion of the other.')
        
class blockDefinition(BaseModel):
    """
    The Block Definition File is very similar to the Conservation Feature File (see
    Section 3.2.2) and is used to set default variable values for groups of conservation
    features. It is always used in conjunction with the Conservation Feature File.
    """
    type: int = Field(..., title='Conservation Feature Type', 
                      description='A unique numerical identifier for groups of conservation features. \
                                   Each ‘type’ must correspond exactly with the types identified \
                                   in the Conservation Feature File ')
    
    # If this is set Planning Unit versus Conservation Feature File ‘target’, should be set to ‘-1’.
    prop: Optional[float] = Field(default=..., title='Proportion Target for Feature Representation', 
                                description='The variable ‘prop’, is short for proportion and can \
                                            be used to set the proportion (i.e. percentage) of a \
                                            conservation feature to be included in the reserve system.',
                               ge =0, le =1)
        
    target: float = Field(..., title=' Feature Representation Target', 
                          description='The target amount of each conservation feature to be included \
                                        in the solutions. These values represent constraints on potential solutions \
                                        to the reserve selection problem. That is, for a reserve solution to be \
                                        feasible it must include at least this amount of each feature. The target \
                                        value is expressed in the same units used to define the amount of each feature in \
                                        each planning unit, contained in the Planning Unit versus Conservation Feature \
                                        File (see Section 3.2.4). However, units from different conservation features can vary \
                                        (e.g. hectares of habitat for one feature and number of occurrences for another, nests \
                                        for a third and length of stream for a fourth).')   
    spf: float = Field(-1, title='Conservation Feature Penalty Factor', 
                       description='The letters ‘spf’ stands for Species Penalty Factor. This \
                                    variable is more correctly referred to as the Conservation Feature Penalty \
                                    Factor.')
    target2: Optional[float] = Field(-1, title='Minimum Clump Size', 
                                     description='This variable specifies a minimum clump size for the \
                                                representation of conservation features in the reserve system. If the amount\
                                                of a conservation feature found in a clump is less that this value, then it does\
                                                not count towards meeting the conservation target')
    
    targetocc: Optional[float] = Field(-1, title='Target for Feature Occurrences', 
                                       description='This variable specifies the minimum number of occurrences of a\
                                                    conservation feature required in a reserve system. This value can be used in\
                                                    situations where even though your conservation target may be met in one planning\
                                                    unit, you would like it to be represented in a greater number of planning units,\
                                                    possibly for risk spreading')

    sepnum: Optional[float] = Field(-1, title='Target for Separated Feature Occurrences', 
                                  description='The number of mutually separated occurrences of a feature \
                                                required in the reserve system')
    
    sepdistance: Optional[float] = Field(-1, title='Minimum Separation Distance', 
                                      description=' Used in conjunction with ‘sepnum’ (above), this variable specifies\
                                                    the minimum distance at which planning units holding a conservation feature\
                                                    are considered to be separate.')    

In [47]:
JSON(conservationFeature.schema_json())



<IPython.core.display.JSON object>

In [11]:
json_schema = conservationFeature.schema_json()

## Output datamodels types

## Examples Read and write

In [None]:
# Create a new input.dat file with by default values
test = DatFile()
example = inputDatFile(BLM=0.1, NUMREPS =100, PUVSPRNAME= 'puvsp.dat')
test.update(example.to_dat())
print(test.data)

In [None]:
MARXAN_FOLDER = '/home/jovyan/work/datasets/raw/marxan_Coral_Triangle_Case_Study_mod'
MARXAN_EXECUTABLE = f'{MARXAN_FOLDER}/MarOpt_v243_Linux64'
MARXAN_INPUTDATA = 'input.dat'

InputFile = DatFile(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}')
InputFile.read()
# InputFile.write(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}')

userInputFile = inputDatFile.from_dat(InputFile.data)
print(userInputFile.PUNAME)
print(userInputFile.BOUNDNAME)
print(userInputFile.SPECNAME)
print(userInputFile.PUVSPRNAME)
print(userInputFile.BLOCKDEFNAME)

## Example marxan project

In [309]:
MARXAN_FOLDER = '/home/jovyan/work/datasets/raw/marxan_Coral_Triangle_Case_Study_mod'
MARXAN_EXECUTABLE = f'{MARXAN_FOLDER}/MarOpt_v243_Linux64'
MARXAN_INPUTDATA = 'input.dat'

### Input File

In [52]:
# read an existing input.dat file
InputFile = DatFile(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}')
InputFile.read()
userInputFile = inputDatFile.from_dat(InputFile.data)
userInputFile

#Static table
# userInputFile_df = pd.DataFrame.from_dict(userInputFile.__dict__, orient='index')
# userInputFile_df.loc['BLM'] = 0.01
# userInputFile_df
# CreateFileFromDF(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}',userInputFile_df,inputDatFile)


# Interactive table ###
userInputFile_df = qgrid.show_grid(pd.DataFrame.from_dict(userInputFile.__dict__, orient='index'), show_toolbar=True)
display(userInputFile_df)
userInputFile.dict()
save_button(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}', inputDatFile, userInputFile_df)



[inputDatFile(VERSION='0.1', BLM=0.01, PROP=0.5, RANDSEED=-1, NUMREPS=10, BESTSCORE=10, NUMITNS=1000000, STARTTEMP=-1, COOLFAC=6, NUMTEMP=10000, COSTTHRESH=0.0, THRESHPEN1=14.0, THRESHPEN2=1.0, INPUTDIR='input', SPECNAME='spec_notarget.dat', PUNAME='pu.dat', PUVSPRNAME='puvspr.dat', BOUNDNAME='boundary.dat', BLOCKDEFNAME='blockdef.dat', VERBOSITY=3, MISSLEVEL=1.0, OUTPUTDIR='output_100', SCENNAME='output', SAVERUN=2, SAVEBEST=2, SAVESUM=0, SAVESCEN=2, SAVETARGMET=2, SAVESUMSOLN=2, SAVELOG=2, SAVESNAPSTEPS=0, SAVESNAPCHANGES=0, SAVESNAPFREQUENCY=0, SAVESOLUTIONSMATRIX=3, RUNMODE=1, ITIMPTYPE=0, HEURTYPE=-1, CLUMPTYPE=0)]

In [12]:
### MODIFICATION TO CHANGE PARAMETERSS
# read an existing input.dat file
InputFile = DatFile(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}')
InputFile.read()
userInputFile = inputDatFile.from_dat(InputFile.data)

## Modify for BLM calculations and save as new input.dat
userInputFile.BLM = 0.02
userInputFile.NUMREPS =10
userInputFile.SPECNAME = 'spec_notarget.dat'

userInputFile_df = pd.DataFrame.from_dict(userInputFile.__dict__, orient='index')
CreateFileFromDF(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}',userInputFile_df,inputDatFile)

[inputDatFile(VERSION='0.1', BLM=0.02, PROP=0.5, RANDSEED=-1, NUMREPS=10, BESTSCORE=10, NUMITNS=1000000, STARTTEMP=-1, COOLFAC=6, NUMTEMP=10000, COSTTHRESH=0.0, THRESHPEN1=14.0, THRESHPEN2=1.0, INPUTDIR='input', SPECNAME='spec_notarget.dat', PUNAME='pu.dat', PUVSPRNAME='puvspr.dat', BOUNDNAME='boundary.dat', BLOCKDEFNAME='blockdef.dat', VERBOSITY=3, MISSLEVEL=1.0, OUTPUTDIR='output_100', SCENNAME='output', SAVERUN=2, SAVEBEST=2, SAVESUM=0, SAVESCEN=2, SAVETARGMET=2, SAVESUMSOLN=2, SAVELOG=2, SAVESNAPSTEPS=0, SAVESNAPCHANGES=0, SAVESNAPFREQUENCY=0, SAVESOLUTIONSMATRIX=3, RUNMODE=1, ITIMPTYPE=0, HEURTYPE=-1, CLUMPTYPE=0)]

### Planning units

In [None]:
f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}'

In [311]:
userPlanningUnits = CreateListModelFromFile(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}', planningUnits)
userPlanningUnits[:3]


# #Static table
# userPlanningUnits_df = pd.DataFrame([s.__dict__ for s in userPlanningUnits])
# userPlanningUnits_df[0:3]
# CreateFileFromDF(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}',userPlaningUnits_df,planningUnits)

#Interactive table
userPlanningUnits_df = qgrid.show_grid(pd.DataFrame([s.__dict__ for s in userPlanningUnits]), show_toolbar=True)
display(userPlanningUnits_df)
save_button(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}', planningUnits, userPlanningUnits_df)


QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Button(description='Save', style=ButtonStyle())

Output()

### Conservation features

In [39]:
userSpecData = CreateListModelFromFile(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.SPECNAME}', conservationFeature)

#Static 
# userSpecData_df = pd.DataFrame([s.__dict__ for s in userSpecData])
# userSpecData_df[0:3]
# CreateFileFromDF(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.SPECNAME}',userSpecData_df,conservationFeature)

#Intercative 
userSpecData_df = qgrid.show_grid(pd.DataFrame([s.__dict__ for s in userSpecData]), show_toolbar=True)
display(userSpecData_df)
save_button(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.SPECNAME}', conservationFeature, userSpecData_df)

QgridWidget(grid_options={'fullWidthRows': True, 'syncColumnCellResize': True, 'forceFitColumns': True, 'defau…

Button(description='Save', style=ButtonStyle())

Output()

### Planning units VS conservation features

In [None]:
# this is the vertical file; for the horizontal one this needs to change a bit
userPuvsp2Data = CreateListModelFromFile(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUVSPRNAME}', planningUnitVSConservationFeatureV)

#Static
# userPuvsp2Data_df = pd.DataFrame([s.__dict__ for s in userPuvsp2Data])
# userPuvsp2Data_df[0:3]

#Intercative
userPuvsp2Data_df = qgrid.show_grid(pd.DataFrame([s.__dict__ for s in userPuvsp2Data]), show_toolbar=True)
display(userPuvsp2Data_df)
save_button(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUVSPRNAME}', planningUnitVSConservationFeatureV, userPuvsp2Data_df)

### Boundary Length

In [None]:
userBoundData = CreateListModelFromFile(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.BOUNDNAME}', boundaryLength)

#Static
# userBoundData_df = pd.DataFrame([s.__dict__ for s in userBoundData])
# userBoundData_df[0:3]

#Interactive
userBoundData_df = qgrid.show_grid(pd.DataFrame([s.__dict__ for s in userBoundData]), show_toolbar=True)
display(userBoundData_df)
save_button(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.BOUNDNAME}', boundaryLength, userBoundData_df)

## Execute Marxan

In [None]:
MARXAN_FOLDER = '/home/jovyan/work/datasets/raw/marxan_Coral_Triangle_Case_Study'
MARXAN_EXECUTABLE = f'{MARXAN_FOLDER}/MarOpt_v243_Linux64'
MARXAN_INPUTDATA = 'input.dat'
InputFile = DatFile(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}')
InputFile.read()
userInputFile = inputDatFile.from_dat(InputFile.data)
# userInputFile.BLM = blm
# userInputFile.OUTPUTDIR =f'output_{blm}'
# userInputFile.NUMREPS =10
# userInputFile_df = pd.DataFrame.from_dict(userInputFile.__dict__, orient='index')
# CreateFileFromDF(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}',userInputFile_df,inputDatFile)

In [None]:
os.mkdir(f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}')

In [None]:
MARXAN_FOLDER = '/home/jovyan/work/datasets/raw/marxan_Coral_Triangle_Case_Study_mod'
MARXAN_EXECUTABLE = f'{MARXAN_FOLDER}/MarOpt_v243_Linux64'
MARXAN_INPUTDATA = 'input.dat'


for blm in [0.001,0.01,0.1,1,10,100]:
    InputFile = DatFile(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}')
    InputFile.read()
    userInputFile = inputDatFile.from_dat(InputFile.data)
    
    ## Modify for BLM calculations and save as new input.dat
    userInputFile.BLM = blm
    userInputFile.OUTPUTDIR =f'output_{blm}'
    userInputFile.NUMREPS =10
    
    userInputFile_df = pd.DataFrame.from_dict(userInputFile.__dict__, orient='index')
    userInputFile_df = pd.DataFrame.from_dict(userInputFile.__dict__, orient='index')
    userInputFile_df= userInputFile_df.drop('BLOCKDEFNAME')

    CreateFileFromDF(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}',userInputFile_df,inputDatFile)
    
    #os.mkdir(f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}')
    #EXECUTES MARXAN
    # Needs to execute marxan from the marxan root folder in order to make the file find the required data.
    os.chdir(MARXAN_FOLDER)
    with subprocess.Popen([MARXAN_EXECUTABLE],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT,
                             universal_newlines=True,
                          bufsize=-1) as process:
        while process.poll() is None:
            output = process.stdout.readline()
            if output:
                print(output.strip())

    os.chdir('/home/jovyan/work/notebooks/Lab')

In [None]:
f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}'

In [313]:
rmtree(f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}')
os.mkdir(f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}')

#EXECUTES MARXAN
# Needs to execute marxan from the marxan root folder in order to make the file find the required data.
os.chdir(MARXAN_FOLDER)

with subprocess.Popen([MARXAN_EXECUTABLE],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT,
                             universal_newlines=True,
                          bufsize=-1) as process:

    while process.poll() is None:
        output = process.stdout.readline()
        if output:
            print(output.strip())

os.chdir('/home/jovyan/work/notebooks/Lab')

## Postprocess Output

This are the validation data types that we need to produce before ingesting it in the DB.  
Next step will connect to both DB and will extract the tables we require and generate pydantic models from them. 

In [53]:
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.automap import automap_base
from sa2schema.to.pydantic import sa_model

In [63]:
JSON(sa_model(apiBase.classes.users).schema_json())



<IPython.core.display.JSON object>

In [54]:
api_engine = create_engine(f"postgres://marxan-api:marxan-api@marxan-postgresql-api:5432/marxan-api")
api_meta = MetaData(schema="public")
api_meta.reflect(bind=api_engine, only=['output_results'])
print(api_meta.tables.keys())
apiBase = automap_base(metadata=api_meta)
apiBase.prepare()
#mapped classes are ready
OutputResults = sa_model(apiBase.classes.output_results) #classes: output_results, scenarios, projects, organizations, users
JSON(OutputResults.schema_json())

dict_keys(['public.output_results', 'public.scenarios', 'public.users', 'public.projects', 'public.organizations'])


  util.warn(
  util.warn(
  util.warn(


<IPython.core.display.JSON object>

In [64]:
geo_api_engine = create_engine(f"postgres://marxan-geo-api:marxan-geo-api@marxan-postgresql-geo-api:5432/marxan-geo-api")
geo_api_meta = MetaData(schema="public")
geo_api_meta.reflect(bind=geo_api_engine)
print(geo_api_meta.tables.keys())
geoApiBase = automap_base(metadata=geo_api_meta)
geoApiBase.prepare()
#mapped classes are ready
OutputResultsData = sa_model(geoApiBase.classes.output_results_data)
JSON(OutputResultsData.schema_json())

  util.warn(


dict_keys(['public.spatial_ref_sys', 'public.migrations', 'public.admin_regions', 'public.admin_regions_0', 'public.admin_regions_1', 'public.admin_regions_2', 'public.wdpa', 'public.features_data', 'public.scenario_features_data', 'public.planning_units_geom', 'public.planning_units_geom_square', 'public.planning_units_geom_hexagon', 'public.planning_units_geom_irregular', 'public.scenarios_pu_data', 'public.scenarios_pu_cost_data', 'public.output_results_data'])


<IPython.core.display.JSON object>

Once the models for validation are ready we can use the same rules we had above to load / save from pandas. `CreateFileFromDF(filename: str, df: Type['Dataframe'], model: Type['Model'])`

In [None]:
#CreateFileFromDF(filename: str, df: Type['Dataframe'], model=OutputResultsData)

In [None]:
#CreateFileFromDF(filename: str, df: Type['Dataframe'], model=OutputResults)