# Output models

Class models for different types of output files.
- OutputRun (files of type output_r00001.csv)
- OutputMV (files of type output_mv00001.csv)
- OutputSum (files of type output_sum.csv)
- OutputSsoln (files of type output_ssoln.csv)
- OutputSolMat (files of type solutionsmatrix.csv) 

I have not created any models for the types *output_sen.dat* or *output_log.dat* because I don't think we need to ingest these data files for any calculation into the database so far.

In [192]:
from pydantic import BaseModel, ValidationError, validator, Field
from pydantic.generics import GenericModel
from typing import Generic, TypeVar, Generic, Optional, Dict, Type
import pandas as pd
%run marxan_utils.ipynb

In [4]:
MARXAN_FOLDER = '/home/jovyan/work/datasets/raw/Marxan_BLM/BLM_0.001'
InputFile = DatFile(f'{MARXAN_FOLDER}/input.dat')
InputFile.read()
userInputFile = inputDatFile.from_dat(InputFile.data)

In [251]:
class OutputRun(BaseModel): 
    """
    Class of files _r00001.csv and _best.csv
    A file is produced for each repeat run containing a list of all the planning units selected in the solution for that run
    """
# General Parameters
    PUID: int = Field(title='Planning Unit id', 
                         description='List of planning units')
    SOLUTION:  int = Field(title='Solution', 
                           description='Planning unit included in this solution if 1, not included if 0',
                           ge =0, le =1)
        
class OutputMV(BaseModel):
    """
    Class of files _mv00001.csv or _mvbest.csv
    This file contains information about the representation of conservation features in the solution for each run.\
    The file contains a total of nine columns which basically report on how the solution performed relative to the targets.\
    Some of these are simply a summary of the information provided in the Conservation Feature File
    """
# General Parameters
    Conservation_Feature: int = Field(title = 'Conservation feature id',
                                      description ='The unique ID number of the conservation feature')
    Feature_Name: str = Field(title = 'Feature Name',
                               description = 'The optional alphabetic name of the conservation feature.\
                               If no name has been specified then nothing will appear in this column.')
    Target: float = Field(title= 'target',
                           description = 'The target level of representation (if any) for that conservation feature')
    Amount_Held: float = Field(title = 'Amount held',
                                description = 'The amount of that conservation feature captured in the reserve system')
    Occurrence_Target: float = Field(title = 'Occurrence target',
                                     description ='The target number of occurrences in the reserve system for that conservation feature')
    Occurrences_Held: float = Field(title= 'Ocurrence s held',
                                    description= 'The number of occurrences of the conservation feature captured in the solution.\
                                    Again, only occurrences in valid clumps are included')
    Separation_Target: float = Field (title= 'Separation target',
                                     description = 'The number of mutually and adequately separated occurrences of that\
                                     conservation feature required in the reserve system')
    Separation_Achieved: float = Field(title = 'Separation Achieved',
                                      description= 'The number reported here will be the lowest of either: the number of \
                                      separate occurrences that are actually achieved in the reserve system ; or the target \
                                      number of separate occurrences. The separation count (see Appendix B-1.3.1) never exceeds \
                                      the separation target for that feature. This is a convention which speeds up the execution \
                                      of the software but it means that no information is given about how far this target is exceeded.' )
    Target_Met: str = Field(title= 'Target met',
                           description = 'An alphabetic variable that returns ‘yes’ if all the targets set for that feature are met,\
                           otherwise it returns ‘no’')
    MPM: float = Field (title= 'Minimum Proportion Met',
                       description= 'Propotion of target met, min in 0 max i 1',
                       ge =0, le =1)
        
class OutputSum(BaseModel):
    """
    Class of files _sum.csv
    This file contains the summary information for each repeat run.\
    It contains nine columns, which basically report on how the solution performed relative to the targets
    """
    Run_Number: int = Field(title='Run number',
                         description='Which of the repeat runs (or solutions) the output pertains to')
    Score: float = Field(title= 'Score',
                       description ='This is the overall objective function value for the solution from that run.\
                       This includes not only the cost of the planning units and the boundary length but also the penalties\
                       for failing to adequately represent all conservation features or exceeding the cost threshold.\
                       It is useful to know this value because it is how Marxan chooses the ‘best’ solution out of your repeat runs.')
    Cost: float = Field(title='Cost',
                     description = 'This is the total cost of the reserve system as determined solely by the costs given to each planning unit.')
    Planning_Units: int = Field(title= 'Planning Units',
                               description = 'The number of planning units contained in the solution for that run')
    Connectivity: float = Field (title ='Connectivity',
                                description= 'The total boundary length of the reserve system.\
                                If boundary length is not being considered in the analyses (i.e. no Boundary Length File is provided),\
                                then this value will read ‘0.0’.')
    Connectivity_Total: float = Field (title ='Connectivity Total',
                                description= 'Total boundary of planning units in study area.')
    Connectivity_In:float = Field (title ='Connectivity In',
                                description= 'Sum of shared boundary between selected planning units.')
    Connectivity_Edge:float = Field (title ='Connectivity Edge',
                                description= 'Same as Connectivity')
    Connectivity_Out:float = Field (title ='Connectivity Out',
                                description= 'Sum of the outer boundaries of unselected planning units.')
    Connectivity_In_Fraction: float = Field (title ='Connectivity In Fraction',
                                description= 'Connectivity_In/Connectivity_Total - the larger this fraction,\
                                the more spatially compact the solution.')
    Penalty: float = Field(title = 'Penalty',
                           description = 'The penalty that was added to the objective function because the reserve system\
                           failed to meet the representation targets for all features. If all features are adequately represented\
                           then the penalty value will be either 0.0 or “-0.0”. (Because of round-off error it is not likely to be\
                           exactly equal to 0, but with only one decimal place presented the round-off error will probably be hidden).\
                           The penalty is useful to know because it can give you an idea of the cost required to meet the remaining targets,\
                           this is something that is not captured simply by looking at the shortfall. It is also another way to rank\
                           the success of runs, looking only at those solutions that have a low penalty.')
    Shortfall: float = Field(title ='Shortfall',
                            description = 'The amount by which the targets for conservation features have not been met\
                            in the solution for that run. The shortfall reported here is the total shortfall summed across\
                            all conservation features. The shortfall is a good indication of whether missing conservation\
                            features are very close or very far from their targets. If there are a number of conservation\
                            features which have missed their targets but the combined shortfall is very sma ll then a planner\
                            might not be too concerned.')
    Missing_Values: int = Field (title ='Missing Values',
                                description= 'The number of features that did not achieve their targets in the final solution for that run.\
                                This is screened according to the ‘misslevel’, which has been set in the Input Parameter File.\
                                If the miss level is set to 1 then every conservation feature which falls below its target level\
                                is counted as missing. If the miss level is set lower than 1 (e.g. 0.98), Marxan may not report a\
                                feature as missing even if the reserve system contains slightly less than the target amount.')
    MPM: float = Field(title ='Minimum Proportion Met',
                    description= 'The Minimum Proportion Met for the worst performing feature.\
                    That is, this value corresponds to the lowest MPM value in the missing value file.')
        

class OutputSsoln(BaseModel):
    """
    Class of files _ssoln.csv
    Summed solution provides the selection frequency of each planning unit across all runs.\
    Each line has the ID number of a planning unit and the number of times\
    that planning unit was selected in the final solution across all repeat runs 
    """
    planning_unit: int = Field(title='Planning Unit',
                              description= 'ID number of a planning unit')
    number: int = Field(title= 'Number',
                       description= 'Number of times a planning unit was selected in the final solution across all repeat runs')
        
        
# OutputSolMat
### Depends on the number of Planning Units:
### Create a class dynamically depending on the number of planning units of the file
### https://stackoverflow.com/questions/62267544/pydantic-generate-model-from-dict
#OutputSolMat = createDynamicModel(filename= filename, name= 'OutputSolMat', dict_def= d)

class OutputSolutionsMatrix(BaseModel):
    SolutionsMatrix: str = Field(..., title='Solution number',
                    description='Solution number')
    PU: Dict[str, float] = Field(..., title='Planning unit selection', 
                               description='Pllaning unites selectes in each solution. The dictionary parameters represent:\
                               key = Planning unit number (P1, P2, P3...), value= selection in this solution (0= False, 1 = True)')
    
    @validator('SolutionsMatrix')
    # Check that the number of output solutions is the same as in the input.dat file
    def SolutionsMatrix_is_valid(cls, method:str) -> str:
        InputFile = DatFile(f'{MARXAN_FOLDER}/input.dat')
        InputFile.read()
        userInputFile = inputDatFile.from_dat(InputFile.data)
        solNum = userInputFile.NUMREPS
        if len('SolutionsMatrix') != solNum:
            raise ValueError(f"Solutions in input file is {solNum} but got {len(SolutionsMatrix)}")
        return method
        
    
    @validator('PU')
    # Check that the number of output planning units is the same as in the pu.dat file
    def PU_is_valid(cls, method:dict) -> dict:
        InputFile = DatFile(f'{MARXAN_FOLDER}/input.dat')
        InputFile.read()
        userInputFile = inputDatFile.from_dat(InputFile.data)
        filename = f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}'
        userPlanningUnits = CreateListModelFromFile(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}', planningUnits)
        puNum =len(userPlanningUnits)
        if len(PU) != puNum:
            raise ValueError(f"PU in input file is {puNum} but got {len(PU)}")
        return method

In [114]:
# Test OutputRun
filename = f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_r00001.csv'
CreateListModelFromFile(filename, OutputRun)[0:2]

[OutputRun(PUID=1, SOLUTION=1), OutputRun(PUID=2, SOLUTION=0)]

In [115]:
# Test OutputMV
filename = f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_mv00001.csv'
CreateListModelFromFile(filename, OutputMV)[0:2]

[OutputMV(Conservation_Feature=59, Feature_Name='nan', Target=37707000.0, Amount_Held=39430000.0, Occurrence_Target=0.0, Occurrences_Held=241.0, Separation_Target=0.0, Separation_Achieved=0.0, Target_Met='yes', MPM=1.0),
 OutputMV(Conservation_Feature=58, Feature_Name='nan', Target=4341000.0, Amount_Held=6960000.0, Occurrence_Target=0.0, Occurrences_Held=29.0, Separation_Target=0.0, Separation_Achieved=0.0, Target_Met='yes', MPM=1.0)]

In [116]:
# Test OutputSum
filename = f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_sum.csv'
CreateListModelFromFile(filename, OutputSum)[0:2]

[OutputSum(Run_Number=1, Score=1364180.78791, Cost=1334628.0, Planning_Units=3191, Connectivity=14404000.0, Connectivity_Total=51664000.0, Connectivity_In=5562000.0, Connectivity_Edge=14404000.0, Connectivity_Out=31698000.0, Connectivity_In_Fraction=0.107657, Penalty=744.78791, Shortfall=2381000.0, Missing_Values=0, MPM=0.996153),
 OutputSum(Run_Number=2, Score=1371390.855987, Cost=1340920.0, Planning_Units=3202, Connectivity=14640000.0, Connectivity_Total=51664000.0, Connectivity_In=5488000.0, Connectivity_Edge=14640000.0, Connectivity_Out=31536000.0, Connectivity_In_Fraction=0.106225, Penalty=1190.855987, Shortfall=2360000.0, Missing_Values=0, MPM=0.959233)]

In [117]:
# Test OutputSsoln
filename = f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_ssoln.csv'
CreateListModelFromFile(filename, OutputSsoln)[0:2]

[OutputSsoln(planning_unit=12178, number=0),
 OutputSsoln(planning_unit=12177, number=0)]

In [253]:
# Test OutputSolMat
filename = f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_solutionsmatrix.csv'
CreateListModelFromFile(filename, OutputSolutionsMatrix)[0:1]

[OutputSolMat(SolutionsMatrix='S1', PU={'P1': 1.0, 'P2': 0.0, 'P3': 0.0, 'P4': 1.0, 'P5': 1.0, 'P6': 1.0, 'P7': 0.0, 'P8': 0.0, 'P9': 1.0, 'P10': 0.0, 'P11': 0.0, 'P12': 1.0, 'P13': 0.0, 'P14': 1.0, 'P15': 0.0, 'P16': 1.0, 'P17': 0.0, 'P18': 0.0, 'P19': 0.0, 'P20': 0.0, 'P21': 0.0, 'P22': 0.0, 'P23': 1.0, 'P24': 0.0, 'P25': 1.0, 'P26': 1.0, 'P27': 0.0, 'P28': 1.0, 'P29': 0.0, 'P30': 0.0, 'P31': 0.0, 'P32': 0.0, 'P33': 0.0, 'P34': 1.0, 'P35': 0.0, 'P36': 0.0, 'P37': 0.0, 'P38': 0.0, 'P39': 0.0, 'P40': 0.0, 'P41': 0.0, 'P42': 0.0, 'P43': 0.0, 'P44': 1.0, 'P45': 0.0, 'P46': 0.0, 'P47': 0.0, 'P48': 0.0, 'P49': 0.0, 'P50': 0.0, 'P51': 0.0, 'P52': 0.0, 'P53': 0.0, 'P54': 0.0, 'P55': 0.0, 'P56': 0.0, 'P57': 0.0, 'P58': 0.0, 'P59': 0.0, 'P60': 0.0, 'P61': 0.0, 'P62': 0.0, 'P63': 0.0, 'P64': 1.0, 'P65': 0.0, 'P66': 0.0, 'P67': 0.0, 'P68': 1.0, 'P69': 0.0, 'P70': 1.0, 'P71': 0.0, 'P72': 0.0, 'P73': 0.0, 'P74': 1.0, 'P75': 0.0, 'P76': 1.0, 'P77': 0.0, 'P78': 0.0, 'P79': 1.0, 'P80': 0.0, 'P81': 1.