## Marxan utils
Developed between January and April 2021  
Includes functions to run marxan and microservices locally 

In [50]:
import subprocess
import os
import io
import re
import csv
from shutil import rmtree

from datetime import datetime
import abc
import collections
from typing import Generic, TypeVar, Generic, Optional, List, Type, Dict
from pydantic import BaseModel, ValidationError, validator, Field
from pydantic.generics import GenericModel
from pydantic import create_model


import pandas as pd
import geopandas as gpd
from IPython.display import display, JSON
import ipywidgets as widgets

import json

from uuid import UUID, uuid4
from scipy.cluster.hierarchy import linkage, fcluster
import pandas as pd


import matplotlib.pyplot as plt
import math
import numpy as np
from shutil import copyfile,ignore_patterns,copytree, rmtree
import kneed
from scipy.interpolate import splrep, splev

import random

## Helper clases and functions for reading and data mutation

In [17]:
class DatFile(object):
    """
    Read and write dat files.
    """
    def __init__(self, file_path: str = None):
        # Ensure the file has the right extension
        if file_path and not file_path.endswith('.dat'):
            raise NameError("File must be a '.dat' extension")
        
        self.__path = file_path
        self.data = None
        
    def __test_path(self, path):
        if not self.__path and path:
            self.__path = path
        elif not self.__path and not path:
            raise NameError('No path to file provided')
    
    def update(self, data, incremental: bool = False ):
        """Updates the data.
    
        Args:
            data (any): The data the object should store.
        Returns:
            The contents of the file as a unicode string.
        """
        __conversion: dict = {
                                int: int,
                                str: str,
                                dict: dict,
                                list: list
                            }
        if not incremental or not self.data:
            self.data = data
        elif type(self.data) == dict:
            self.data.append(data)
        else:
            self.data = self.data + __conversion[type(self.data)](data)
        
    
    def read(self, file_path: str = None):
        """Gets a files contents as a unicode string.
    
        Args:
            filename (string): The full path to the file that will be read.
        Returns:
            The contents of the file as a unicode string.
        """
        self.__test_path(file_path)
        try:
            with io.open(self.__path, mode="r", encoding="utf-8") as f:
                self.data = f.readlines()
        except (UnicodeDecodeError) as e:
            with io.open(self.__path, mode="r", encoding="ISO-8859-1") as f:
                self.data = f.read()
        except Exception as e:
            raise e
    
    def write(self, file_path: str = None):
        """Writes a files contents as a unicode string
    
        Args:
            filename (string): The full path to the file that will be written.  
            s (string): The unicode string to write.  
            mode (string): Optional. The file write mode. Default value is w.  
        Returns:
            None  
        """
        self.__test_path(file_path)
        try:
            with io.open(self.__path, mode, encoding="utf-8") as f: 
                f.write(self.data)
        except Exception as e:
            raise e

In [18]:
# Just in case we need to format the float numbers in a very specific way
class MyNumber:
    """
    Number formater wraper to allow a specific format string for a float number.
    """
    def __init__(self, val):
        self.val = val

    def __format__(self,format_spec):
        ss = ('{0:'+format_spec+'}').format(self.val)
        if ( 'E' in ss):
            mantissa, exp = ss.split('E')            
            return mantissa + 'E'+ exp[0] + '00' + exp[1:]
        return ss

def num(s):
    """
    Coerce Number transformation into float.
    """
    try:
        return int(s)
    except ValueError:
        try:
            return float(s)
        except ValueError:
            return s

def getSizeOfNestedList(listOfElem):
    ''' Get number of elements in a nested list'''
    count = 0
    # Iterate over the list
    for elem in listOfElem:
        # Check if type of element is list
        if type(elem) == list:  
            # Again call this function to get the size of this element
            count += getSizeOfNestedList(elem)
        else:
            count += 1    
    return count

def _readTabularFile(filename: str)-> dict:
    """
    Gets a input.dat file and outputs a dict of parameters.

    Args:
        filename (string): The full path to the file that will be read.
    Returns:
        The contents of the file as a dict.
    """
    if filename.endswith('.dat'):
        inputData = DatFile(filename)
        inputData.read()
        outputData =[]
    
        dialect= csv.Sniffer().sniff(inputData.data[0],[',', '\t',' '])
        for line in inputData.data: 
            if dialect.delimiter ==',':
                pair= re.compile('\s').split(line.replace('"','').replace(' ', '_').strip('""').strip('\r').strip('\n').replace(',', '\t'))
            else:
                pair= re.compile('\s').split(line.replace('"','').strip('""').strip('\r').strip('\n'))
            outputData.append(pair)
    
    else:
        df = pd.read_csv(filename)
        df.columns = df.columns.str.rstrip().str.replace(' ','_')
        df = df.applymap(str)
        outputData = [df.columns.values.tolist()] + df.values.tolist()
    
    return outputData


def CreateListModelFromFile(filename: str, model: Type['Model'])-> List['Model']:
    """
    Gets a input.dat file and outputs a list of selected model.

    Args:
        filename (string): The full path to the file that will be read.
        model (Type['Model']): Data model used to read and validate the data
    Returns:
        The contents of the file as a Data model list.
    """   
    
    if model != OutputSolutionsMatrix:
        inputData = _readTabularFile(filename)
    else:
        wideData = _readTabularFile(filename)
        inputData=[['SolutionsMatrix','PU']]
        for x in range(1,len(wideData)):
            pu = dict(zip(wideData[0], wideData[x]))
            sol = pu['SolutionsMatrix']
            del pu['SolutionsMatrix']
            inputData.append([sol,pu])
        
    return [model.parse_obj(dict(zip(inputData[0], x))) for x in inputData[1:]] 

def CreateFileFromDF(filename: str, df: Type['Dataframe'], model: Type['Model'])-> List[list]:
    """
    Gets a dataframe and outs a dat.file.

    Args:
        filename (string): The full path to the file that will be read.
        df (Type['Dataframe']): Dataframe to save
        model (Type['Model']): Data model used to read and validate the data
    Returns:
        The contents of the file as a Data model list.
    """
    if model == inputDatFile:
        data = df.transpose().to_dict('records')
        validatedData = [inputDatFile(**x) for x in data]
        keys = data[0].keys()
        csv.register_dialect('dat', delimiter=' ')
        with open(filename, 'w', encoding='utf8', newline='')  as output_file:
            dict_writer = csv.writer(output_file, dialect='dat')
            for row in data[0].items():
                dict_writer.writerow(row)
    else:
        data = df.to_dict('records')
        validatedData = [model(**x) for x in data]
#         keys = validatedData[0].__dict__.keys()
#         csv.register_dialect('dat', delimiter='\t')
#         with open(filename, 'w', encoding='utf8', newline='')  as output_file:
#             dict_writer = csv.DictWriter(output_file, keys, dialect='dat')
#             dict_writer.writeheader()
#         dict_writer.writerows(toCSV)
#             dict_writer.writerows(data)

        if model == OutputSolutionsMatrix:
            data = df.drop(columns=df.columns[1]).join(df[df.columns[1]].apply(pd.Series)).to_dict('records')
            
        keys={k: v for k, v in data[0].items() if v is not None}.keys()
        dataNotNone = list(({key : val for key, val in sub.items() if val!= None} for sub in data)) 

        outputs_as_csv =[OutputRun,OutputMV,OutputSsoln,OutputSolutionsMatrix,OutputSum]
        if model not in outputs_as_csv:
            csv.register_dialect('dat', delimiter='\t')
            with open(filename, 'w', encoding='utf8', newline='')  as output_file:
                dict_writer = csv.DictWriter(output_file, keys, dialect='dat')
                dict_writer.writeheader()
                dict_writer.writerows(dataNotNone)
        else:
            with open(filename, 'w', encoding='utf8', newline='')  as output_file:
                dict_writer = csv.DictWriter(output_file, keys)
                dict_writer.writeheader()
                dict_writer.writerows(dataNotNone)
    
    
    return validatedData[0:2]

#def save_button(filename: str, model: Type['Model'], data: Type['QgridWidget'])-> None:
#    """
#    creates a widget button and attach a on click event.
#    
#    Args:
#        filename (string): The full path to the file that will be read.
#        data (Type['QgridWidget']): Qgrid widget
#        model (Type['Model']): Data model used to read and validate the data
#    """
#    button = widgets.Button(description="Save")
#    output = widgets.Output()
#
#    display(button, output)
#
#    def on_button_clicked(b):
#        with output:
#            CreateFileFromDF(filename, data.get_changed_df(), model)
#
#    button.on_click(on_button_clicked)
    
def create_marxanProject(path: str)-> None:
    os.mkdir(f'{path}/input')
    os.mkdir(f'{path}/output')
    copyfile('src', f'{path}')

def execute_marxan(path: str)-> None:
#     MARXAN_EXECUTABLE = f'{path}/MarOpt_v243_Linux64'
    MARXAN_EXECUTABLE = f'{path}/marxan' # new algorithm
    #EXECUTES MARXAN
    # Needs to execute marxan from the marxan root folder in order to make the file find the required data.
    os.chdir(path)
    with subprocess.Popen([MARXAN_EXECUTABLE],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT,
                             universal_newlines=True,
                          bufsize=-1) as process:
        while process.poll() is None:
            output = process.stdout.readline()
            if output:
                print(output.strip())

    os.chdir('/home/jovyan/work/notebooks/Lab')
    

def createDynamicModel(filename: str, name: str, dict_def: dict):
    filename = f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_solutionsmatrix.csv'
    file_as_list =_readTabularFile(filename)
    file_transposed =[list(x) for x in zip(*file_as_list)] # get the colname as the key instead of the rowname
    file_dict = {item[0]: item[1:] for item in file_t} # convert to dictionary
    d = dict.fromkeys(file_dict.keys(), (int,...)) # extract keys to use as column inputs
    d[list(file_dict.keys())[0]] = (str,list(file_dict.keys())[0]) 
    

    fields = {}
    for field_name,value in dict_def.items():
        if isinstance(value,tuple):
            fields[field_name]=value
        elif isinstance(value,dict):
            fields[field_name]=(dict_model(f'{name}_{field_name}',value),...)
        else:
            raise ValueError(f"Field {field_name}:{value} has invalid syntax")
    return create_model(name,**fields)


def readInput(MARXAN_FOLDER, MARXAN_INPUTDATA):
    InputFile = DatFile(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}')
    InputFile.read()
    userInputFile = inputDatFile.from_dat(InputFile.data)
    
    userInputFile.VERBOSITY = 0
    userInputFile.SAVERUN = 3
    userInputFile.SAVEBEST = 3
    userInputFile.SAVESUMMARY = 3
    userInputFile.SAVESCEN = 3
    userInputFile.SAVETARGMET = 3
    userInputFile.SAVESUMSOLN = 3
    
    userInputFile_df = pd.DataFrame.from_dict(userInputFile.__dict__, orient='index')
    userInputFile_df= userInputFile_df.drop('BLOCKDEFNAME')
    CreateFileFromDF(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}',userInputFile_df,inputDatFile)   
    return userInputFile

def saveInput(MARXAN_FOLDER, MARXAN_INPUTDATA,userInputFile):
    userInputFile_df = pd.DataFrame.from_dict(userInputFile.__dict__, orient='index')
    userInputFile_df= userInputFile_df.drop('BLOCKDEFNAME')
    CreateFileFromDF(f'{MARXAN_FOLDER}/{MARXAN_INPUTDATA}',userInputFile_df,inputDatFile)  
    

def validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, model):
    userInputFile = readInput(MARXAN_FOLDER,MARXAN_INPUTDATA)
    model_dict = {planningUnits:f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}',
                  conservationFeature: f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.SPECNAME}',
                  planningUnitVSConservationFeatureV:f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUVSPRNAME}',
                  boundaryLength:f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.BOUNDNAME}',
                  OutputRun:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_best.csv', 
                  OutputMV:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_mvbest.csv', 
                  OutputSum:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_sum.csv', 
                  OutputSsoln:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_ssoln.csv',
                  OutputSolutionsMatrix:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_solutionsmatrix.csv'}
    if model in model_dict.keys():
        model_list = CreateListModelFromFile(model_dict[model], model)
        model_df = pd.DataFrame([s.__dict__ for s in model_list])
        CreateFileFromDF(model_dict[model],model_df,model)
    return model_df

def validate_all_files(MARXAN_FOLDER: str, MARXAN_INPUTDATA: str)-> tuple:
    userInputFile = readInput(MARXAN_FOLDER,MARXAN_INPUTDATA)
    model_dict = {planningUnits:f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}',
                  conservationFeature: f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.SPECNAME}',
                  planningUnitVSConservationFeatureV:f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUVSPRNAME}',
                  boundaryLength:f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.BOUNDNAME}',
                  OutputRun:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_best.csv', 
                  OutputMV:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_mvbest.csv', 
                  OutputSum:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_sum.csv', 
                  OutputSsoln:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_ssoln.csv',
                  OutputSolutionsMatrix:f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}/{userInputFile.SCENNAME}_solutionsmatrix.csv'}
    result = [userInputFile]
    for model in [planningUnits,conservationFeature,planningUnitVSConservationFeatureV,boundaryLength]:
        model_list = CreateListModelFromFile(model_dict[model], model)
        model_df = pd.DataFrame([s.__dict__ for s in model_list])
        result.append(model_df)
    return tuple(result)
    


    
def modify_pu_status(MARXAN_FOLDER, MARXAN_INPUTDATA, percentage= 0.3, lock_out =True):
    userInputFile = readInput(MARXAN_FOLDER, MARXAN_INPUTDATA)
    pu = validateFile(MARXAN_FOLDER,'input.dat',planningUnits)

    randomlist = []
    if lock_out:
        for i in range(0,int(percentage*len(pu))):
            n = random.randrange(2,4,1)
            randomlist.append(n)
    else:
        for i in range(0,int(percentage*len(pu))):
            n = random.randrange(2,3,1)
            randomlist.append(n)
        

    l = [0] * (len(pu) - len(randomlist))
    randomlist.extend(l)
    random.shuffle(randomlist)

    pu['status'] = randomlist
    CreateFileFromDF(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/pumock.dat',pu, planningUnits)
    return pu

## BLM Calibration

In [51]:
def createBlmProject(MARXAN_FOLDER: str, MARXAN_INPUTDATA:str, grid_file_path:str) -> list:
    # copytree(f'{path}', f'{path}/blm')
    # BLM values to start calibration
    # Rule of thumb: Z = pu_area*[0.01,0.1,1,10,100]
    
    ### RUN MARXAN several times
    blm_range= [0.001,0.01,0.1,1,10,100]
    pu_area = gpd.read_file(grid_file_path)
    area = pu_area['geometry'].map(lambda p: p.area / 10**6).mean()
    blm_values = [element * math.sqrt(area) for element in blm_range]
    blm_dict = dict(zip(blm_range, blm_values))
    
    blm_folder=[]

    for blm in blm_range:
        print(f'\033[1m --> Running BLM_{blm}...\033[0m')
        
        ## Create a folder for each BLM run at the same level as the original folder, 
        ## remove  folder if there is a previous one
        ## copy all  files ecxcpet outputs
        if os.path.exists(f'{os.path.dirname(MARXAN_FOLDER)}/BLM_{blm}'):
            rmtree(f'{os.path.dirname(MARXAN_FOLDER)}/BLM_{blm}')
        copytree(MARXAN_FOLDER, f'{os.path.dirname(MARXAN_FOLDER)}/BLM_{blm}',ignore=ignore_patterns('output_*'))
        
            
        ## Read input files
        InputFile = DatFile(f'{os.path.dirname(MARXAN_FOLDER)}/BLM_{blm}/{MARXAN_INPUTDATA}')
        InputFile.read()
        userInputFile = inputDatFile.from_dat(InputFile.data)

        ## Modify for BLM calculations and save as new input.dat
        userInputFile.BLM = blm_dict[blm]
        userInputFile.OUTPUTDIR = 'output'
        userInputFile.NUMREPS = 10
        userInputFile.VERBOSITY = 0

        userInputFile_df = pd.DataFrame.from_dict(userInputFile.__dict__, orient='index')
        userInputFile_df.drop('BLOCKDEFNAME', inplace=True)

        CreateFileFromDF(f'{os.path.dirname(MARXAN_FOLDER)}/BLM_{blm}/{MARXAN_INPUTDATA}',userInputFile_df, inputDatFile)
        if not os.path.exists(f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}'):
            os.mkdir(f'{MARXAN_FOLDER}/{userInputFile.OUTPUTDIR}')

        blm_folder.append(f'BLM_{blm}')
        os.chmod(f'{os.path.dirname(MARXAN_FOLDER)}/BLM_{blm}/marxan', 0o755)
        execute_marxan(f'{os.path.dirname(MARXAN_FOLDER)}/BLM_{blm}')
    
    return blm_folder, blm_values

def plotClumping(MARXAN_FOLDER: str, MARXAN_INPUTDATA : str,grid_file_path:str):
    blm_folder = [filename for filename in os.listdir(os.path.dirname(MARXAN_FOLDER)) if filename.startswith("BLM")]
    fig = plt.figure(figsize=(10,10))

    for idx, folder in enumerate(blm_folder):
        axn = fig.add_subplot(321+idx)
        solution = validateFile(f'{os.path.dirname(MARXAN_FOLDER)}/{folder}',MARXAN_INPUTDATA, OutputRun)
        pu_area = gpd.read_file(grid_file_path)
        puid_list = ['PUID','PU_ID','puid','pu_id']
        pu_col = list(set(pu_area.columns) & set(puid_list))[0]
        solution_grid = pu_area.merge(solution,left_on=f'{pu_col}',right_on = 'PUID',how='inner')
        solution_grid.plot(ax=axn,column='SOLUTION', legend=True)
        blm = readInput(f'{os.path.dirname(MARXAN_FOLDER)}/{folder}', MARXAN_INPUTDATA).BLM
        axn.set_title(f'BLM = {round(blm,4)}')
        
    return plt.show()

def BLM_calibration(MARXAN_FOLDER: str, MARXAN_INPUTDATA :str,grid_file_path: str, Plot: bool = True)-> float:
    blm_df =pd.DataFrame(columns=['folder','blm'])
    blm_folder, blm_values = createBlmProject(MARXAN_FOLDER, MARXAN_INPUTDATA,grid_file_path)
    blm_df['folder'] = blm_folder
    blm_df['blm']= blm_values

    for blm in blm_folder:
        solution = validateFile(f'{os.path.dirname(MARXAN_FOLDER)}/{blm}',MARXAN_INPUTDATA, OutputRun)
        summary = validateFile(f'{os.path.dirname(MARXAN_FOLDER)}/{blm}',MARXAN_INPUTDATA, OutputSum)
        best =summary.loc[summary.loc[:]['Score'].idxmin(),'Run_Number']
        cost=summary.loc[best-1,'Cost']
        blm_df.loc[blm_df['folder']==blm,'cost']=cost
    
        pu_area = gpd.read_file(grid_file_path)
        puid_list = ['PUID','PU_ID','puid','pu_id']
        pu_col = list(set(pu_area.columns) & set(puid_list))[0]
        
        solution_grid = pu_area.merge(solution,left_on=f'{pu_col}',right_on = 'PUID',how='inner')
        blm_df.loc[blm_df['folder']==blm,'boundary_length']=solution_grid.dissolve(by='SOLUTION')['geometry'].length[1] ## perimeter in m
    
    ###Curve with no fit
    y = blm_df['boundary_length']
    x = blm_df['cost']
    kn = kneed.KneeLocator(x, y, curve='convex', direction='decreasing')
    best_blm = round(blm_df.loc[blm_df['cost']==kn.knee,'blm'].values[0],4)
    #blm_df.loc[blm_df['cost']==kn.knee,'folder'].values[0] #in which folder
    
#   ##Curve fit with polynomial
#     y = blm_df['boundary_length'].values
#     x = blm_df['cost'].values
#     fit = np.polyfit(x, y, 3)
#     fit_equation = fit[0]*x**3 + fit[1]*x**2 + fit[2]*x +fit[3]
#     kn_p = kneed.KneeLocator(x, fit_equation, curve='convex', direction='decreasing')
#     #best_blm_poly = blm_df.loc[blm_df['cost']==kn.knee,'blm'].values[0]
#     best_blm_poly = kn.knee

    
#    ##Curve fit with spline
#     blm_df = blm_df.sort_values('cost')
#     y_sp = blm_df['boundary_length'].values
#     x_sp = blm_df['cost'].values
#     spl = splrep(x_sp,y_sp)
#     y_spl = splev(x_sp,spl)
#     kn_sp = kneed.KneeLocator(x_sp, y_spl, curve='convex', direction='decreasing')
#     #best_blm_sp = blm_df.loc[blm_df['cost']==kn.knee,'blm'].values[0]
#     best_blm_sp = kn.knee

    
#     print(f'The optimun BLM is poly= {best_blm_poly}, spl = {best_blm_sp}')
    print(f'The optimun BLM is {best_blm}')
    
    if Plot==True:
        fig = plt.figure(figsize=(5,5))
        plt.xlabel('cost')
        plt.ylabel('boundary length')
        plt.plot(x, y, 'bo-')
        for xi in blm_df.cost:
             plt.text(blm_df.loc[(blm_df['cost']==xi),'cost'].values[0], 
                      blm_df.loc[(blm_df['cost']==xi),'boundary_length'].values[0],
                      round(blm_df.loc[(blm_df['cost']==xi),'blm'].values[0],4))
        
#         plt.plot(x_sp,y_spl,'--')
#         plt.plot(x, fit_equation,color = 'r',alpha = 0.5, label = 'Polynomial fit')
#         plt.vlines(kn_p.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
#         plt.vlines(kn_sp.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
        plt.vlines(kn.knee, plt.ylim()[0], plt.ylim()[1], linestyles='dashed')
        
    return best_blm

## Cluster 5 most different solutions

In [None]:
def clusterSolutions(MARXAN_FOLDER, MARXAN_INPUTDATA,k=5):
    """
    Returns a list of the 5 most different solutions
    """
    
    # Open solutions matrix file
    userInputFile = readInput(MARXAN_FOLDER, MARXAN_INPUTDATA)
    userSolMat_df = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, OutputSolutionsMatrix)
    solmat = userSolMat_df.drop(columns=userSolMat_df.columns[1]).join(userSolMat_df[userSolMat_df.columns[1]].apply(pd.Series))
    
    solmat = solmat.loc[:,solmat.columns != 'SolutionsMatrix']
    
    # Create distance matrix with Jaccard similarity
    dist_mat = linkage(solmat, method='average',metric='jaccard')
    
    # Find k clusters
    # print(f'Building cluster of {k} most different solutions')
    groups = fcluster(dist_mat, k, criterion='maxclust')
    
    # Get best solution per cluster (solution with the lowest Score) from sum table
    summary = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, OutputSum)
    best =summary.loc[summary.loc[:]['Score'].idxmin(),'Run_Number']
    print(f'Overall best solution is {best}')
    
    bestlist =[]
    for i in range(k):
        g = np.where(groups == i+1)[0]
        sol = summary.loc[summary.loc[g]['Score'].idxmin(),'Run_Number']
#         print(f'Group {i+1} best solution = {sol}')
        bestlist.append(sol)

#     See figure
#     plt.figure(figsize=(10, 7))
#     plt.scatter(clust[:,0], clust[:,1], c=cluster.labels_, cmap='rainbow')
    
    return bestlist

## Gap Analysis

In [None]:
def gapAnalysis(MARXAN_FOLDER,MARXAN_INPUTDATA,premarxan =True, postmarxan=True):
    """This function analyses the protected amount of each species
    and returns a DataFrame with the values needed to construct the Gap Analysis.
    Calculates for each feature
    - amount_total: Total area of a feature in planning area
    - amount_target: Target area to achieve (target column renamed)
    - amount_lock: Area of feature inside lock in areas
    - amount_best: Area of feature slected from the best solution
    - prop: proportion of total area to achieve 
    - prop_lock: proportion of area locked in
    """
    ## Validate all input files
    userInputFile = readInput(MARXAN_FOLDER,MARXAN_INPUTDATA)
    pu = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, planningUnits)
    spec = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, conservationFeature)
    puvsp = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, planningUnitVSConservationFeatureV)
    
   # Calculate total amount of the feature in the planning area and merge with spec.dat file
    total = puvsp.groupby(['species']).sum().reset_index()
    df=spec.merge(total[['species','amount']], 
                       left_on = 'id', 
                       right_on = 'species', 
                       how='inner').drop(columns=['species']).rename(columns={'amount':'amount_total'})
    
    # Calculate the amount to achieve (target-as area or prop- as %) from spec.dat
    # if 'prop' is the available column: calculate 'target' as amount_total* prop 
    # if 'target' is the available column: calculate 'prop' as target/amount_total
    if 'prop' in spec.columns:
        df['target']=df['amount_total']*df['prop']
        df['amount_target']=df['amount_total']*df['prop']
    
    else: # 'target' in spec.columns
        df['prop']=df['target']/df['amount_total']
        df.rename(columns={'target':'amount_target'})
    
    
    # Calculate the feature amount in locked in planning units
    # Get PUID that have been locked-in (status = 2 in pu.dat)
    pulock = pu[pu['status']==2].id
    lockin = puvsp[puvsp.pu.isin(pulock)].groupby(['species']).sum().reset_index()
    df = df.merge(lockin[['species','amount']], 
                                left_on = 'id', 
                                right_on = 'species', 
                                how='left').drop(columns=['species']).rename(columns={'amount':'amount_lock'})

    # fill NaN with 0
    df = df.fillna(0)
    # Porportion of target that is in locked in areas (PA) 
    df['prop_lock']=(df['amount_lock']/df['amount_total'])    
    
    # Target met
    df['target_met_pre']= np.where((df['prop_lock']/df['prop'])*100 > userInputFile.MISSLEVEL, 'yes', 'no')
    
    
    if postmarxan:
        # Calculate amount protected in best solution
        # Validate OutputMV
        mvbest = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, OutputMV)
    
        # Get PUID that have been selected
        df = df.merge(mvbest, 
               left_on = 'id', 
               right_on = 'Conservation_Feature', 
               how='left').drop(columns=['Conservation_Feature','Feature_Name'])

        # Add proportion best to table
        df['prop_best']=(df['Amount_Held']/df['amount_total'])
        
        # Best solution - locked species amount (for additive plotting)
        df['prop_best_minus_lock']=(df['prop_best']-df['prop_lock'])
    
    return df.drop(columns=['type','spf','target2','targetocc','sepnum','sepdistance'])


def plotGap(data, title,ax, post_marxan=True):
    width = 0.8
        
    if post_marxan:
        sort_propf = data.sort_values("prop_best")
        labels = sort_propf.name if sort_propf.name.dtype == 'str' else sort_propf.id.apply(str)
        x_pos = [i for i, _ in enumerate(labels)]
        target = sort_propf.prop
        ax.plot(target,labels,color='red')    
        ax.barh(labels,sort_propf['prop_best_minus_lock'], width, left=sort_propf['prop_lock'],
            label='marxan',color='#00cccc', edgecolor='#00cccc', align='center')
        ax.barh(labels, sort_propf.prop_lock, width, label='locked/protected',color='#617f9d', edgecolor='#617f9d', align='center')
    
    else:
        sort_propi= data.sort_values("prop_lock")
        labels = sort_propi.name if sort_propi.name.dtype == 'str' else sort_propi.id.apply(str)
        x_pos = [i for i, _ in enumerate(labels)]
        target = sort_propi.prop
        ax.plot(target,labels,color='red')    
        ax.barh(labels, sort_propi['prop_lock'], width, label='locked/protected',color='#617f9d', edgecolor='#617f9d', align='center')   
            
    ax.set_xlabel('Percentage protected')
    ax.set_ylabel('Species')
    ax.set_title(title)
    ax.legend(loc ='upper left')
    ax.set_xlim([0,1])
    reduced_labels = [word[:15] for word in labels] if labels.dtype == 'str' else df.id
    plt.setp(ax, yticks=x_pos, yticklabels=reduced_labels)

## Unmet targets pipeline  

### Identify unmet targets: 
    File output_mvbest, columns 'Target Met' + 'MPM' (Minimum Proportion Met)

### Workflow for unmet solutions:
    If the feature is not met but by very little --> mark as met or decrease MISSLEVEL 
    Else:
        If the feature is in locked-out planning units --> Send warning/mark as met/rethink problem
        Else:
            If the feature is in a high cost area --> Increase SPF
            Else:
                The feature has low range and is isolated --> Increase SPF

In [None]:
def locatePU(MARXAN_FOLDER:str,MARXAN_INPUTDATA:str,pus_of_interest=None, feature=None) -> dict:
    """ Check if feature is in an area of interest
    """
    userInputFile = readInput(MARXAN_FOLDER,MARXAN_INPUTDATA)
    puvsp = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, planningUnitVSConservationFeatureV)
    
    feature_in_pu=puvsp[(puvsp['species']==feature) & (puvsp.pu.isin(pus_of_interest))]
    
    if len(feature_in_pu)== 0: 
        percentage_in_pu = 0
    else:
        amount_total = puvsp[puvsp['species']==feature].groupby(['species']).sum('amount').reset_index().amount.values[0]
        amount_in_pu= feature_in_pu.groupby('species').sum('amount').reset_index().amount.values[0]
        percentage_in_pu = round((amount_in_pu/amount_total)*100,2)
    return percentage_in_pu

def unmetDecisionTree(MARXAN_FOLDER, MARXAN_INPUTDATA,
                      lock_out_limit=50, 
                      high_cost_quantile= 0.7, 
                      hcost_limit=20):
    
    """Follow the decision flow:
    - Check if the target is missed by little (already considering the defined misslevel)
    - Check if the feature is in a locked-out area 
    - Check if the feature is in a high cost area
    - Check if the feature is very isolated or the range of the feature is very small
    
    Parameteres:
    - threshold met: when to consider a target as met (dy default the misslevel set at input.dat)
    - lock_out_limit: amount of the feature pu's that are in locked out areas that trigger response
    - high_cost_quantile: decide what is high cost
    - hcost_limit: amount of the feature pu's that are in high cost areas that trigger response
    """
    
    # Validate and read files
    userInputFile = readInput(MARXAN_FOLDER,MARXAN_INPUTDATA)
    pu = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, planningUnits)
    puvsp = validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, planningUnitVSConservationFeatureV)
    mvbest= validateFile(MARXAN_FOLDER,MARXAN_INPUTDATA, OutputMV)
    
    df = mvbest[['Conservation_Feature','Target_Met','MPM']].copy()
    threshold_met = userInputFile.MISSLEVEL
    
    if len(df.loc[df['Target_Met']=='no','Conservation_Feature'])==0:
        unmet = {'None':'All targets met'}
    else: 
        for feature in list(df.loc[df['Target_Met']=='no','Conservation_Feature'].values):
        
            # 1. Close to target 
            # Solution: 
            # - Mark as met: Modify output_mvbest.csv (Target Met) MPM: minimum proportion met
            # - Decrease MISSLEVEL (input.dat)
            if (df.loc[df['Conservation_Feature']==feature,'MPM']>=threshold_met-(threshold_met*1/100)).bool():
                df.loc[df['Conservation_Feature']==feature,'Eval']='Close to target (1% away of misslevel)'

            else:
        
            # 2. In lock-out areas (re-think problem)
            # Solution: 
            # - Rethink problem (Assume not met)
            # - Unblock lock-out pu: Modifir pu.dat (status)
        
                pu_excluded = list(pu[pu['status']==3].id) 
                excluded_per = locatePU(MARXAN_FOLDER,MARXAN_INPUTDATA,pus_of_interest=pu_excluded, feature=feature)
                if excluded_per > lock_out_limit: df.loc[df['Conservation_Feature']==feature,'Eval'] = f'{excluded_per} % in locked-out areas'
            
                else:
                        
            # 3. In high cost areas (increase SPF)
            # Solution: 
            # - Increase SPF: Modify spec.dat (spf)
        
                    pu_hcost = list(pu[pu['cost']>pu['cost'].quantile([high_cost_quantile], interpolation='nearest').values[0]].id)
                    hcost_per = locatePU(MARXAN_FOLDER,MARXAN_INPUTDATA,pus_of_interest=pu_excluded, feature=feature)
                    if hcost_per > hcost_limit: df.loc[df['Conservation_Feature']==feature,'Eval'] = f'{hcost_per} % in high cost areas'
                
                    else:
        
            # 4. Small range or isolated 
            # Solution:
            # - Increase SPF: Modify spec.dat (spf)
                        feat_range = round(puvsp[puvsp['species']==feature].count().pu/len(pu)*100,2)
                        df.loc[df['Conservation_Feature']==feature,'Eval'] = f'Small range ({feat_range} % of planning area) or isolated'     
        
            unmet = dict(zip(df[df['Target_Met']=='no'].Conservation_Feature, df[df['Target_Met']=='no'].Eval))
    return unmet

### Difference Map

In [49]:
def diffMap(SCEN1_PATH: str,SCEN2_PATH: str, show_count :bool)-> dict:
# 1. Read files (input.dat, pu.dat, pu_grid)
    MARXAN_INPUTDATA = 'input.dat'
    # Input files
    input1 = readInput(SCEN1_PATH,MARXAN_INPUTDATA)
    input2 = readInput(SCEN2_PATH,MARXAN_INPUTDATA)
    
    #pu.dat files
    pu1 = validateFile(SCEN1_PATH,MARXAN_INPUTDATA, planningUnits)
    pu2 = validateFile(SCEN2_PATH,MARXAN_INPUTDATA, planningUnits)
    
    ## Frequency of solutions output file
    s1 = validateFile(SCEN1_PATH,MARXAN_INPUTDATA, OutputSsoln)
    s2 = validateFile(SCEN2_PATH,MARXAN_INPUTDATA, OutputSsoln)

    ## Number of runs
    n1 = int(input1.NUMREPS)
    n2 = int(input2.NUMREPS)
    
# 2. Rename fields and merge with grid 
    # freq = the total number of times a pu has been selected in all solutions
    diff= s1.rename(columns={'number':'freq1'})
    diff['freq2'] = s2.number

    # rel_freq = freq/number of solutions (different scenarios may have different solution number)
    diff['rel_freq1'] = round(diff.freq1/n1,2)
    diff['rel_freq2'] = round(diff.freq2/n2,2)

    # diff = selection of scenario 1 - scenario 2
    # dif_abs = absolute difference 
    diff['diff'] = diff.rel_freq1  - diff.rel_freq2
    diff['diff_abs'] = diff['diff'].abs()

    # pu.dat get status
    pu1 = pu1.rename(columns={'status':'status1'})
    pu1['status2'] = pu2.status

# 3. Assign categories
    # Never selected in S1 & S2
    never = list(diff[(diff['rel_freq1'] == 0)   & (diff['rel_freq2'] == 0)].planning_unit.values)

    # Locked-in /Always
    # status 2 in pu.dat should give rel_freq=1, but that is not the case. TO DO: understand why
    lock1 = list(diff[(diff['rel_freq1'] == 1)].planning_unit.values)
    lock2 = list(diff[(diff['rel_freq2'] == 1)].planning_unit.values)
    always = list(set(lock1) & set(lock2)) # Always selected in both
    S1_always = list(set(lock1) - set(always)) # always selected in S1
    S2_always = list(set(lock2) - set(always)) # always selected in S2

    # Always selected in S1 & S2
    #always = list(diff[(diff['rel_freq1'] == 1) & (diff['rel_freq2'] == 1)].planning_unit.values)

    # Only selected in S1, but below 1
    S1_only = list(diff[(diff['rel_freq1'] != 0 ) & (diff['rel_freq2'] == 0)].planning_unit.values)
    S1_only = list(set(S1_only) - set(S1_always))

    # Only selected in S2, but below 1
    S2_only = list(diff[(diff['rel_freq1'] == 0) & (diff['rel_freq2'] != 0)].planning_unit.values)
    S2_only = list(set(S2_only) - set(S2_always))
    
    # Selected in both with comparable frequency (difference between -0.1 and 0.1)
    both_comparable = list(diff[(diff['rel_freq1'] != 0) & 
                            (diff['rel_freq1'] < 1) &
                            (diff['rel_freq2'] != 0) & 
                            (diff['rel_freq2'] < 1) & 
                            (diff['diff_abs'] <= 0.1)].planning_unit.values)

    # Selected in both with higher frequency in S1 (difference > 0.1)
    both_higherS1 = list(diff[(diff['rel_freq1'] != 0) & 
                            (diff['rel_freq1'] < 1) &
                            (diff['rel_freq2'] != 0) & 
                            (diff['rel_freq2'] < 1)  & 
                            (diff['diff'] > 0.1)].planning_unit.values)

    # Selected in both with higher frequency in S2 (difference < -0.1)
    both_higherS2 = list(diff[(diff['rel_freq1'] != 0) & 
                            (diff['rel_freq1'] < 1) &
                            (diff['rel_freq2'] != 0) & 
                            (diff['rel_freq2'] < 1)  & 
                            (diff['diff'] < -0.1)].planning_unit.values)
    
    # create dict of categories
    categories = {'never': never,
         'always':always,
         'S1_always':S1_always,
         'S2_always':S2_always,
         'S1_only':S1_only,
         'S2_only':S2_only,
         'both_comparable':both_comparable,
         'both_higherS1':both_higherS1,
         'both_higherS2':both_higherS2}
    
    diff['category']= 'NotAssigned'
    for key in categories.keys():
        diff.loc[diff['planning_unit'].isin(categories[key]),'category'] = key
        
# 4. Sanity checks
# All pu's are assigned to one category
    if show_count:
        all_pu= []
        for i in categories.keys():
            all_pu = all_pu + categories[i]
            print(f"there are {len(categories[i])} pu's assigned to {i}")
        print(f"there are {len(diff)-len(all_pu) } pu's not assigned")
    
    # Covert to dict (this format can be more elaborate depending on FE requirements)
    diff_dict =diff[['planning_unit','category','diff','diff_abs']].to_dict()
    
    return diff_dict

def plotDiffMap(diff_dict: dict,GRID_PATH: str, solid: bool):
    # grid file
    pu_grid = gpd.read_file(f'{GRID_PATH}')
    puid_list = ['PIUD','PU_ID','puid','pu_id']
    for option in puid_list:
        for col_name in pu_grid.columns:
            if option in col_name:
                pu_col = col_name
    
    # convert dict to dataframe
    diff = pd.DataFrame.from_dict(diff_dict,orient='columns')
    
    # merge with pu grid
    diff = pu_grid[[f'{pu_col}','geometry']].merge(diff,left_on=f'{pu_col}',right_on ='planning_unit',how='inner')
    
    # Plot as solid colors or frequency 
    diff.plot(column = 'category',legend =True,figsize =(10,10))

### FPF Calibration

In [None]:
def createFpfProject(MARXAN_FOLDER: str, MARXAN_INPUTDATA:str, unmet_dict: dict) -> list:
    
    ### RUN MARXAN several times
    fpf_range= [1,2,5,10,20,50,100]
    fpf_folder=[]

    for fpf in fpf_range:
        print(f'\033[1m --> Running FPF_{fpf}...\033[0m')
        
        ## Create a folder for each FPF run at the same level as the original folder, 
        ## remove  folder if there is a previous one
        ## copy all  files except outputs
        if os.path.exists(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}'):
            rmtree(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}')
        copytree(MARXAN_FOLDER, f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}',ignore=ignore_patterns('output_*'))
        
            
        ## Read input files
        userInputFile = readInput(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}', MARXAN_INPUTDATA)
        userInputFile.NUMREPS = 10
        userInputFile.VERBOSITY = 0
        saveInput(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}', MARXAN_INPUTDATA,userInputFile)
        
        spec = validateFile(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}',MARXAN_INPUTDATA, conservationFeature)
        
        unmet = list(unmet_dict.keys())
        for feature in unmet:
            spec.loc[spec['id'] == feature,'spf'] = (spec[spec['id'] == feature].spf)*fpf
        CreateFileFromDF(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}/{userInputFile.INPUTDIR}/{userInputFile.SPECNAME}',spec, conservationFeature) 
        
        if not os.path.exists(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}/{userInputFile.OUTPUTDIR}'):
            os.mkdir(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}/{userInputFile.OUTPUTDIR}')

        fpf_folder.append(f'FPF_{fpf}')
        os.chmod(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}/marxan', 0o755)
        execute_marxan(f'{os.path.dirname(MARXAN_FOLDER)}/FPF_{fpf}')
    
    return fpf_folder

def FPF_calibration(MARXAN_FOLDER: str, MARXAN_INPUTDATA :str, unmet_dict: dict, Plot: bool = True)-> float:
    fpf_df =pd.DataFrame(columns=['folder','multiplier','missed_targets'])
    fpf_folder = createFpfProject(MARXAN_FOLDER, MARXAN_INPUTDATA, unmet_dict)
    fpf_df['folder'] = fpf_folder
    fpf_df['multiplier']= [int(i.split('_')[1]) for i in fpf_folder]

    for fpf in fpf_folder:
        # using mvbest
        #mvbest = validateFile(f'{os.path.dirname(MARXAN_FOLDER)}/{fpf}',MARXAN_INPUTDATA, OutputMV)
        #fpf_df.loc[fpf_df['folder']==fpf,'missed_targets']= len(mvbest[mvbest['Target_Met'] == 'no'])
        
        # using an average of all the runs
        summary = validateFile(f'{os.path.dirname(MARXAN_FOLDER)}/{fpf}',MARXAN_INPUTDATA, OutputSum)
        fpf_df.loc[fpf_df['folder']==fpf,'missed_targets']= summary['Missing_Values'].mean()
    
    ###Curve with no fit
    y = fpf_df['missed_targets']
    x = fpf_df['multiplier']

    best_fpf = fpf_df.loc[fpf_df['missed_targets'] == min(fpf_df.missed_targets),'multiplier'].values[0]
    
    if Plot==True:
        fig = plt.figure(figsize=(5,5))
        plt.xlabel('FPF multiplier')
        plt.ylabel('Missed targets')
        plt.plot(x, y, 'bo-')
        
    return best_fpf

## Input data model types

In [10]:
class inputDatFile(BaseModel):
    """
    This is the description of the input data clase base on marxan input file.
    """
    
    # General Parameters
    VERSION: str = Field('0.1', title='Version', 
                         description='Type of input file')
    BLM:  Optional[float] = Field(0., title='Boundary Length Modifier', 
                           description='Boundary Length Modifier')
    PROP: float = Field(0., title='Starting Proportion', 
                            description='Proportion of planning units in initial reserve system')
    RANDSEED: Optional[int] = Field(-1, title='Random Seed', 
                              description='Random seed number')
    NUMREPS: int = Field(1, title='Repeat Runs', 
                             description='The number of repeat runs you wish to do')
    BESTSCORE: Optional[int] = Field(0, title='Best Score Speedup', 
                               description='This variable tells Marxan not to keep track of the best score \
                                until it reaches a specified minimum level.')
    
    # Annealing Parameters
    NUMITNS: int = Field(0, title='Number of Iterations', 
                         description='Number of iterations for annealing')
    STARTTEMP: int = Field(1, title='Initial Temperature', 
                           description='Starting temperature for annealing')
    COOLFAC: int = Field(0, title='Cooling Factor', 
                         description='Cooling factor for annealing')
    NUMTEMP: int = Field(1, title='Temperature Decreases', 
                         description='Number of temperature decreases for annealing')
    
    # Cost Threshold
    COSTTHRESH: Optional[float] = Field(0, title='Threshold', 
                              description='Cost threshold')
    THRESHPEN1: Optional[float] = Field(0, title='Penalty Factor A', 
                              description='Size of cost threshold penalty')
    THRESHPEN2: Optional[float] = Field(0, title='Penalty Factor B', 
                              description='Shape of cost threshold penalty')
    
    # Input Files
    INPUTDIR: str = Field('input', title='Input Folder', 
                          description='User Defined Name of the folder containing input data files')
    SPECNAME: str = Field('spec.dat', title='Species File Name', 
                          description='Name of Conservation Feature File')
    PUNAME: str = Field('pu.dat', title='Planning Unit File Name', 
                        description='Name of Planning Unit File')
    PUVSPRNAME: str = Field('puvspr2.dat', title='Planning Unit versus Species', 
                            description='Name of Planning Unit versus Conservation Feature File')
    BOUNDNAME: str = Field('bound.dat', title='Boundary Length', 
                           description='Name of Boundary Length File')
    BLOCKDEFNAME: Optional[str] = Field('blockdef.dat', title='Block Definitions', 
                              description='Name of Block Definition File')
    
    # Output Files
    VERBOSITY: int =  Field(1, title='Screen Output', 
                            description='Amount of output displayed on the program screen')
    MISSLEVEL: Optional[float] =  Field(1, title='Species missing proportion', 
                              description='Amount or target below which it is counted as ‘missing’')
    OUTPUTDIR: str = Field('output', title='', 
                           description='User Defined Name of the folder in which to save output files')
    SCENNAME: str = Field('Default_name', title='Scenario name', 
                          description='Scenario name for the saved output files')
    SAVERUN: Optional[int] = Field(3, title='Save each run', 
                         description='Save each run? (0 = no)')
    SAVEBEST: Optional[int] =  Field(3, title='Save the best run', 
                           description='Save the best run? (0 = no)')
    SAVESUMMARY: Optional[int] =  Field(3, title='Save summary', 
                          description='Save summary information? (0 = no)')
    SAVESCEN: Optional[int] =  Field(3, title='Save scenario', 
                           description='Save scenario information? (0 = no)')
    SAVETARGMET: Optional[int] =  Field(3, title='Save targets met', 
                              description='Save targets met information? (0 = no)')
    SAVESUMSOLN: Optional[int] =  Field(3, title='', 
                              description='Save summed solution information? (0 = no)')
    SAVELOG: Optional[int] =  Field(1, title='Save summed solution', 
                          description='Save log files? (0 = no)')
    SAVESNAPSTEPS: Optional[int] =  Field(0, title='Save snapshots', 
                                description='Save snapshots each n steps (0 = no)')
    SAVESNAPCHANGES: Optional[int] =  Field(0, title='Save snapshots changes', 
                                  description='Save snapshots after every n changes (0 = no)')
    SAVESNAPFREQUENCY: Optional[int] =  Field(0, title='Frequency of snapshots', 
                                    description='Frequency of snapshots if they are being used')
    SAVESOLUTIONSMATRIX: Optional[int] =  Field(3, title='Frequency of snapshots', 
                                    description='Frequency of snapshots if they are being used')
    
    # Program control.
    RUNMODE: int = Field(1, title='Run Options', 
                         description='User Defined The method Marxan uses to find solutions')
    
    ITIMPTYPE: int =  Field(1, title='Iterative Improvement', 
                            description='Iterative improvement type')
    HEURTYPE: int =  Field(1, title='Heuristic', 
                           description='Heuristic type')
    CLUMPTYPE: Optional[int] =  Field(0, title='Clumping Rule', 
                            description='Clumping penalty type')
    
    # class Config:
    @validator('SAVERUN')
    def SAVERUN_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVEBEST')
    def SAVEBEST_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVESUMMARY')
    def SAVESUMMMARY_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVESCEN')
    def SAVESCEN_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVETARGMET')
    def SAVETARGMET_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVESUMSOLN')
    def SAVESUMSOLN_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVELOG')
    def SAVELOG_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('SAVESOLUTIONSMATRIX')
    def SAVESOLUTIONSMATRIX_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'No file generated',
                      1: 'save a file as .dat',
                      2: 'save a file as .txt',
                      3: 'save a file as .csv'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('RUNMODE')
    def RUNMODE_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'Apply Simulated Annealing followed by a Heuristic',
                      1: 'Apply Simulated Annealing followed by Iterative Improvement',
                      2: 'Apply Simulated Annealing followed by a Heuristic, followed by Iterative',
                      3: 'Use only a Heuristic',
                      4: 'Use only Iterative Improvement',
                      5: 'Use a Heuristic followed by Iterative Improvement',
                      6: 'Use only Simulated Annealing'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('ITIMPTYPE')
    def ITIMPTYPE_is_valid(cls, method: int) -> int:
        allowed_set = {0: 'Normal Iterative Improvement',
                      1: 'Two Step Iterative Improvement',
                      2: '‘Swap’ Iterative Improvement',
                      3: 'Normal Improvement followed by Two Step Iterative Improvement'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('HEURTYPE')
    def HEURTYPE_is_valid(cls, method: int) -> int:
        allowed_set = {-1:'Ignored',
                      0: 'Richness',
                      1: 'Greedy',
                      2: 'Max Rarity',
                      3: 'Best Rarity',
                      4: 'Average Rarity',
                      5: 'Sum Rarity',
                      6: 'Product Irreplaceability',
                      7: 'Summation Irreplaceability'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('VERBOSITY')
    def VERBOSITY_is_valid(cls, method: int) -> int:
        allowed_set = {-1:'Ignored',
                      0: 'Silent Running',
                      1: 'Results Only',
                      2: 'General Progress',
                      3: 'Detailed Progress'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    @validator('CLUMPTYPE')
    def CLUMPTYPE_is_valid(cls, method: int) -> int:
        allowed_set = {-1:'Ignored',
                       0: 'Partial clumps do not count',
                       1: 'Partial clumps count half',
                       3: 'Graduated penalty'}
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method
    
    def to_dat(self):
        """
        Gets a input.dat file and outputs a dict of parameters.

        Args:
            
        Returns:
            The data model converted on a readeable dat string.
        """
        s:str = ''
        for key, value in self.dict().items():
            #add the key
            s = s + key + " " + str(value) + "\n"
        return s
    
    @classmethod
    def from_dat(cls: Type['Model'], dat: str)-> 'Model':
        """
        Gets a input.dat file and outputs a dict of parameters.

        Args:
            filename (string): The full path to the file that will be read.
        Returns:
            The contents of the file as a dict.
        """
        obj = {}
        
        for line in dat:
            if re.search('[A-Z1-9_]{2,}', line, re.DOTALL):
                pair = line.strip('\r').strip('\n').split(' ')
                pair =list(filter(None, pair)) ## delete empty lists
                if len(pair)>2: #remove lists that have more values
                    continue
                assert len(pair) == 2 # if the list has more or less attribute than 2 it means we have make a mistake spliting stuff
                obj[pair[0]] = num(pair[1].strip(' '))
                
        return cls.parse_obj(obj)

In [11]:
class conservationFeature(BaseModel):
    """
    The Conservation Feature File contains information about each of the conservation
    features being considered, such as their name, target representation, and the penalty
    if the representation target is not met. It has the default name ‘spec.dat’. Because of
    this name it is sometimes referred to as the Species File, although conservation
    features will oftenbe surrogates such as habitat type rather than actual species. 
    """
   
    id: int = Field(..., title='Conservation Feature ID', 
                    description='A unique numerical identifier for each conservation feature. \
                                Be careful not to duplicate id numbers as Marxan will ignore all but the last one.')
    
    # All variables you wish to take on Block Definition attributes should
    # have their value entered as -1 in the Conservation Feature File
    type: Optional[int] = Field(title='Conservation Feature Type', 
                                description='Used to define groups of conservation features for which a number of \
                                umbrella attributes can be set for all features within the specified group (or “type”). \
                                Each group of features must have a unique numerical identifier. This variable is used \
                                in conjunction with the Block Definition File (see Section 3.3.2) which will contain \
                                the attributes to be assigned to a particular group of conservation features.')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    # Target or prop; they are excluding
    target: Optional[float] = Field(title=' Feature Representation Target', 
                          description='The target amount of each conservation feature to be included \
                                        in the solutions. These values represent constraints on potential solutions \
                                        to the reserve selection problem. That is, for a reserve solution to be \
                                        feasible it must include at least this amount of each feature. The target \
                                        value is expressed in the same units used to define the amount of each feature in \
                                        each planning unit, contained in the Planning Unit versus Conservation Feature \
                                        File (see Section 3.2.4). However, units from different conservation features can vary \
                                        (e.g. hectares of habitat for one feature and number of occurrences for another, nests \
                                        for a third and length of stream for a fourth).')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    prop: Optional[float] = Field(title='Proportion Target for Feature Representation', 
                                description='The variable ‘prop’, is short for proportion and can \
                                            be used to set the proportion (i.e. percentage) of a \
                                            conservation feature to be included in the reserve system.',
                               ge =0, le =1)
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    
    spf: float = Field(..., title='Conservation Feature Penalty Factor', 
                       description='The letters ‘spf’ stands for Species Penalty Factor. This \
                                    variable is more correctly referred to as the Conservation Feature Penalty \
                                    Factor.')
    target2: Optional[float] = Field(title='Minimum Clump Size', 
                                     description='This variable specifies a minimum clump size for the\
                                                representation of conservation features in the reserve system. If the amount\
                                                of a conservation feature found in a clump is less that this value, then it does\
                                                not count towards meeting the conservation target')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    targetocc: Optional[float] = Field(title='Target for Feature Occurrences', 
                                       description='This variable specifies the minimum number of occurrences of a\
                                                    conservation feature required in a reserve system. This value can be used in\
                                                    situations where even though your conservation target may be met in one planning\
                                                    unit, you would like it to be represented in a greater number of planning units,\
                                                    possibly for risk spreading')
    
    name: Optional[str] = Field(title='Conservation Feature Name', 
                                description='The alphabetical (no numbers!) name of each conservation feature')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    sepnum: Optional[float] = Field(title='Target for Separated Feature Occurrences', 
                                    description='The number of mutually separated occurrences of a feature \
                                                required in the reserve system')
    
    # If Block Definition File is being used for this feature, then the Target for Feature 
    # Occurrences should be set to -1 here.
    sepdistance: Optional[float] = Field(title='Minimum Separation Distance', 
                                      description=' Used in conjunction with ‘sepnum’ (above), this variable specifies\
                                                    the minimum distance at which planning units holding a conservation feature\
                                                    are considered to be separate.')

     
#     Add validator to check either target or prop are present
#     #But doesn't trigger
#     @validator('prop')
#     def check_prop_or_target(cls, v, values):
#         if 'target' not in values and not prop:
#             raise ValueError('either field target or prop is required')
#         return prop
    
                 
class planningUnits(BaseModel):
    """
    The Planning Unit File contains all the information related to planning units, except
    for the distribution of conservation features across planning units (which is held in the
    Planning Unit versus Conservation Feature File ). The default name for this file is
    ‘pu.dat’.
    """
    id: int = Field(..., title=' Planning Unit ID', 
                    description='A unique numerical identifier for each planning unit')
    cost: Optional[float] = Field(1, title='Planning Unit Cost', 
                        description='The cost of including each planning unit in the reserve system. ')
    status: Optional[int] = Field(0, title='Planning Unit Status', 
                          description='This variable defines whether a planning unit (PU) is locked in or out of\
                                      the initial and final reserve systems. It can take one of four values:')
    
    # This variable is only required if a minimum separation between feature occurrences has been specified in the
    # ‘sepdistance’ column of the Conservation Feature File 
    xloc: Optional[float] = Field(title='X Planning Unit Location', 
                          description='The x-axis coordinate of the planning unit')
    
    # This variable is only required if a minimum separation between feature occurrences has been specified in the
    # ‘sepdistance’ column of the Conservation Feature File 
    yloc: Optional[float] = Field(title='Y Planning Unit Location', 
                          description='The y-axis coordinate of the planning unit')
    
    @validator('status')
    def status_is_valid(cls, method: int) -> int:
        allowed_set = {
                       0: 'The PU is not guaranteed to be in the initial reserve',
                       1: 'The PU will be included in the initial reserve',
                       2: 'The PU is fixed in the reserve system (“locked in”).\
                           It starts in the initial reserve system and cannot be removed.',
                       3: 'The PU is fixed outside the reserve system (“locked out”).\
                       It is not included in the initial reserve system and cannot be added.'
                      }
        
        if method not in allowed_set.keys():
            raise ValueError(f"must be in {allowed_set}, got '{method}'")
        
        return method


class planningUnitVSConservationFeatureV(BaseModel):
    """
    The Planning Unit versus Conservation Feature File contains information on the
    distribution of conservation features across planning units. It has the default file
    name, ‘puvpsr2.dat’. There are two different formats this file can take, vertical and
    horizontal. Either is acceptable and Marxan will test the header line to determine
    which format is being used. This one represent the vertical format
    """
    species: int = Field(..., title='Conservation Feature ID', 
                         description='The unique id number of each conservation feature. This must \
                                    correspond to the id numbers used in the Conservation Feature File.')
    pu: int = Field(..., title='Planning Unit ID', 
                         description='The id of a planning unit where the conservation feature listed on \
                                    the same row occurs. The planning unit id numbers must correspond \
                                    to the numbers used in the Planning Unit File')
    amount: float = Field(..., title='Conservation Feature Amount', 
                         description='The amount of the conservation feature occurring in the planning unit \
                                      listed on the same row. This amount may be related to the abundance \
                                      of a species or the extent of a certain habitat type.')
#                           gt=0) # marxan will ignore o amounts

class planningUnitVSConservationFeatureH(BaseModel):
    """
    The Planning Unit versus Conservation Feature File contains information on the
    distribution of conservation features across planning units. It has the default file
    name, ‘puvpsr2.dat’. There are two different formats this file can take, vertical and
    horizontal. Either is acceptable and Marxan will test the header line to determine
    which format is being used. . This one represent the horizontal format:
    the Planning Unit versus Conservation Feature File is simply a matrix of 
    planning units versus conservation features.
    """
    pu: int = Field(..., title='Planning Unit ID',
                    description='Amount of output displayed on the program screen')
    species: List[int] = Field(..., title='Conservation Feature IDs', 
                               description='The unique id number of each conservation feature.')
    amount: List[float] = Field(..., title='Conservation Feature Amount', 
                                 description='The amount of the conservation feature occurring in the planning unit \
                                             listed on the same row',
                                 gt=0)


### Optional data file structures

class boundaryLength(BaseModel):
    """
    The Boundary Length File contains information about the length or ‘effective length’
    of shared boundaries between planning units. This file is necessary if you wish to use
    the Boundary Length Modifier to improve the compactness of reserve solutions (bound.dat).
    
    !Any missing values within the file will prevent Marxan from running, for instance 
    !if ‘id1’ and ‘id2’ are set but no value for ‘boundary’ is entered.
    """
    # important not to duplicate boundaries
    id1: int = Field(..., title='Planning Unit ID', 
                     description='‘id1’ and ‘id2’ contain the id number of the two \
                     planning units that share a boundary.')
    id2: int = Field(..., title=' Planning Unit ID', 
                     description='id1’ and ‘id2’ contain the id number of the two \
                     planning units that share a boundary.')
    boundary: float = Field(..., title='Boundary Length', 
                           description='Boundary Length or boundary cost is relative \
                           measure of how important it is to include one planning unit \
                           in the reserve system, given the inclusion of the other.')
        
class blockDefinition(BaseModel):
    """
    The Block Definition File is very similar to the Conservation Feature File (see
    Section 3.2.2) and is used to set default variable values for groups of conservation
    features. It is always used in conjunction with the Conservation Feature File.
    """
    type: int = Field(..., title='Conservation Feature Type', 
                      description='A unique numerical identifier for groups of conservation features. \
                                   Each ‘type’ must correspond exactly with the types identified \
                                   in the Conservation Feature File ')
    
    # If this is set Planning Unit versus Conservation Feature File ‘target’, should be set to ‘-1’.
    prop: Optional[float] = Field(default=..., title='Proportion Target for Feature Representation', 
                                description='The variable ‘prop’, is short for proportion and can \
                                            be used to set the proportion (i.e. percentage) of a \
                                            conservation feature to be included in the reserve system.',
                               ge =0, le =1)
        
    target: float = Field(..., title=' Feature Representation Target', 
                          description='The target amount of each conservation feature to be included \
                                        in the solutions. These values represent constraints on potential solutions \
                                        to the reserve selection problem. That is, for a reserve solution to be \
                                        feasible it must include at least this amount of each feature. The target \
                                        value is expressed in the same units used to define the amount of each feature in \
                                        each planning unit, contained in the Planning Unit versus Conservation Feature \
                                        File (see Section 3.2.4). However, units from different conservation features can vary \
                                        (e.g. hectares of habitat for one feature and number of occurrences for another, nests \
                                        for a third and length of stream for a fourth).')   
    spf: float = Field(-1, title='Conservation Feature Penalty Factor', 
                       description='The letters ‘spf’ stands for Species Penalty Factor. This \
                                    variable is more correctly referred to as the Conservation Feature Penalty \
                                    Factor.')
    target2: Optional[float] = Field(-1, title='Minimum Clump Size', 
                                     description='This variable specifies a minimum clump size for the \
                                                representation of conservation features in the reserve system. If the amount\
                                                of a conservation feature found in a clump is less that this value, then it does\
                                                not count towards meeting the conservation target')
    
    targetocc: Optional[float] = Field(-1, title='Target for Feature Occurrences', 
                                       description='This variable specifies the minimum number of occurrences of a\
                                                    conservation feature required in a reserve system. This value can be used in\
                                                    situations where even though your conservation target may be met in one planning\
                                                    unit, you would like it to be represented in a greater number of planning units,\
                                                    possibly for risk spreading')

    sepnum: Optional[float] = Field(-1, title='Target for Separated Feature Occurrences', 
                                  description='The number of mutually separated occurrences of a feature \
                                                required in the reserve system')
    
    sepdistance: Optional[float] = Field(-1, title='Minimum Separation Distance', 
                                      description=' Used in conjunction with ‘sepnum’ (above), this variable specifies\
                                                    the minimum distance at which planning units holding a conservation feature\
                                                    are considered to be separate.')    

### Output dat model types

In [14]:
class OutputRun(BaseModel): 
    """
    Class of files _r00001.csv and _best.csv
    A file is produced for each repeat run containing a list of all the planning units selected in the solution for that run
    """
# General Parameters
    PUID: int = Field(title='Planning Unit id', 
                         description='List of planning units')
    SOLUTION:  int = Field(title='Solution', 
                           description='Planning unit included in this solution if 1, not included if 0',
                           ge =0, le =1)
        
class OutputMV(BaseModel):
    """
    Class of files _mv00001.csv or _mvbest.csv
    This file contains information about the representation of conservation features in the solution for each run.\
    The file contains a total of nine columns which basically report on how the solution performed relative to the targets.\
    Some of these are simply a summary of the information provided in the Conservation Feature File
    """
# General Parameters
    Conservation_Feature: int = Field(title = 'Conservation feature id',
                                      description ='The unique ID number of the conservation feature')
    Feature_Name: str = Field(title = 'Feature Name',
                               description = 'The optional alphabetic name of the conservation feature.\
                               If no name has been specified then nothing will appear in this column.')
    Target: float = Field(title= 'target',
                           description = 'The target level of representation (if any) for that conservation feature')
    Amount_Held: float = Field(title = 'Amount held',
                                description = 'The amount of that conservation feature captured in the reserve system')
    Occurrence_Target: float = Field(title = 'Occurrence target',
                                     description ='The target number of occurrences in the reserve system for that conservation feature')
    Occurrences_Held: float = Field(title= 'Ocurrence s held',
                                    description= 'The number of occurrences of the conservation feature captured in the solution.\
                                    Again, only occurrences in valid clumps are included')
    Separation_Target: float = Field (title= 'Separation target',
                                     description = 'The number of mutually and adequately separated occurrences of that\
                                     conservation feature required in the reserve system')
    Separation_Achieved: float = Field(title = 'Separation Achieved',
                                      description= 'The number reported here will be the lowest of either: the number of \
                                      separate occurrences that are actually achieved in the reserve system ; or the target \
                                      number of separate occurrences. The separation count (see Appendix B-1.3.1) never exceeds \
                                      the separation target for that feature. This is a convention which speeds up the execution \
                                      of the software but it means that no information is given about how far this target is exceeded.' )
    Target_Met: str = Field(title= 'Target met',
                           description = 'An alphabetic variable that returns ‘yes’ if all the targets set for that feature are met,\
                           otherwise it returns ‘no’')
    MPM: float = Field (title= 'Minimum Proportion Met',
                       description= 'Propotion of target met, min in 0 max i 1',
                       ge =0, le =1)
        
class OutputSum(BaseModel):
    """
    Class of files _sum.csv
    This file contains the summary information for each repeat run.\
    It contains nine columns, which basically report on how the solution performed relative to the targets
    """
    Run_Number: int = Field(title='Run number',
                         description='Which of the repeat runs (or solutions) the output pertains to')
    Score: float = Field(title= 'Score',
                       description ='This is the overall objective function value for the solution from that run.\
                       This includes not only the cost of the planning units and the boundary length but also the penalties\
                       for failing to adequately represent all conservation features or exceeding the cost threshold.\
                       It is useful to know this value because it is how Marxan chooses the ‘best’ solution out of your repeat runs.')
    Cost: float = Field(title='Cost',
                     description = 'This is the total cost of the reserve system as determined solely by the costs given to each planning unit.')
    Planning_Units: int = Field(title= 'Planning Units',
                               description = 'The number of planning units contained in the solution for that run')
    Connectivity: float = Field (title ='Connectivity',
                                description= 'The total boundary length of the reserve system.\
                                If boundary length is not being considered in the analyses (i.e. no Boundary Length File is provided),\
                                then this value will read ‘0.0’.')
    Connectivity_Total: float = Field (title ='Connectivity Total',
                                description= 'Total boundary of planning units in study area.')
    Connectivity_In:float = Field (title ='Connectivity In',
                                description= 'Sum of shared boundary between selected planning units.')
    Connectivity_Edge:float = Field (title ='Connectivity Edge',
                                description= 'Same as Connectivity boundary length')
    Connectivity_Out:float = Field (title ='Connectivity Out',
                                description= 'Sum of the outer boundaries of unselected planning units.')
    Connectivity_In_Fraction: float = Field (title ='Connectivity In Fraction',
                                description= 'Connectivity_In/Connectivity_Total - the larger this fraction,\
                                the more spatially compact the solution.')
    Penalty: float = Field(title = 'Penalty',
                           description = 'The penalty that was added to the objective function because the reserve system\
                           failed to meet the representation targets for all features. If all features are adequately represented\
                           then the penalty value will be either 0.0 or “-0.0”. (Because of round-off error it is not likely to be\
                           exactly equal to 0, but with only one decimal place presented the round-off error will probably be hidden).\
                           The penalty is useful to know because it can give you an idea of the cost required to meet the remaining targets,\
                           this is something that is not captured simply by looking at the shortfall. It is also another way to rank\
                           the success of runs, looking only at those solutions that have a low penalty.')
    Shortfall: float = Field(title ='Shortfall',
                            description = 'The amount by which the targets for conservation features have not been met\
                            in the solution for that run. The shortfall reported here is the total shortfall summed across\
                            all conservation features. The shortfall is a good indication of whether missing conservation\
                            features are very close or very far from their targets. If there are a number of conservation\
                            features which have missed their targets but the combined shortfall is very sma ll then a planner\
                            might not be too concerned.')
    Missing_Values: int = Field (title ='Missing Values',
                                description= 'The number of features that did not achieve their targets in the final solution for that run.\
                                This is screened according to the ‘misslevel’, which has been set in the Input Parameter File.\
                                If the miss level is set to 1 then every conservation feature which falls below its target level\
                                is counted as missing. If the miss level is set lower than 1 (e.g. 0.98), Marxan may not report a\
                                feature as missing even if the reserve system contains slightly less than the target amount.')
    MPM: float = Field(title ='Minimum Proportion Met',
                    description= 'The Minimum Proportion Met for the worst performing feature.\
                    That is, this value corresponds to the lowest MPM value in the missing value file.')
        

class OutputSsoln(BaseModel):
    """
    Class of files _ssoln.csv
    Summed solution provides the selection frequency of each planning unit across all runs.\
    Each line has the ID number of a planning unit and the number of times\
    that planning unit was selected in the final solution across all repeat runs 
    """
    planning_unit: int = Field(title='Planning Unit',
                              description= 'ID number of a planning unit')
    number: int = Field(title= 'Number',
                       description= 'Number of times a planning unit was selected in the final solution across all repeat runs')
        
        
# OutputSolMat
### Depends on the number of Planning Units:
### Create a class dynamically depending on the number of planning units of the file
### https://stackoverflow.com/questions/62267544/pydantic-generate-model-from-dict
#OutputSolMat = createDynamicModel(filename= filename, name= 'OutputSolMat', dict_def= d)

class OutputSolutionsMatrix(BaseModel):
    SolutionsMatrix: str = Field(..., title='Solution number',
                    description='Solution number')
    PU: Dict[str, float] = Field(..., title='Planning unit selection', 
                               description='Planning units selected in each solution. The dictionary parameters represent:\
                               key = Planning unit number (P1, P2, P3...), value= selection in this solution (0= False, 1 = True)')
    
#     @validator('SolutionsMatrix')
#     # Check that the number of output solutions is the same as in the input.dat file
#     def SolutionsMatrix_is_valid(cls, method:str) -> str:
#         InputFile = DatFile(f'{MARXAN_FOLDER}/input.dat')
#         InputFile.read()
#         userInputFile = inputDatFile.from_dat(InputFile.data)
#         solNum = userInputFile.NUMREPS
#         if len('SolutionsMatrix') != solNum:
#             raise ValueError(f"Solutions in input file is {solNum} but got {len(SolutionsMatrix)}")
#         return method
        
    
#     @validator('PU')
#     # Check that the number of output planning units is the same as in the pu.dat file
#     def PU_is_valid(cls, method:dict) -> dict:
#         InputFile = DatFile(f'{MARXAN_FOLDER}/input.dat')
#         InputFile.read()
#         userInputFile = inputDatFile.from_dat(InputFile.data)
#         filename = f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}'
#         userPlanningUnits = CreateListModelFromFile(f'{MARXAN_FOLDER}/{userInputFile.INPUTDIR}/{userInputFile.PUNAME}', planningUnits)
#         puNum =len(userPlanningUnits)
#         if len(PU) != puNum:
#             raise ValueError(f"PU in input file is {puNum} but got {len(PU)}")
#         return method