# Import libraries

In [None]:
import warnings
warnings.filterwarnings( 'ignore' )

In [None]:
# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Basic libraries
#
import math
import json
import pickle
import numpy    as np
import pandas   as pd
from   datetime import datetime

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# Sklearn
#
from sklearn                 import metrics
from sklearn                 import preprocessing


# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# XGBoost
#
import xgboost

# =-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
# User libraries
#
from utils.Logger   import *

# Parameters

In [None]:
# Other parameters
#
VERBOSE = True

In [None]:
# Initiate logger
#
if VERBOSE:
    logger = init_logger( log_file = 'logs.log' ) 

# Import data

**Dataset**

- Irrigation 


**Context**

The scope is to predict if a region is 'irrigated' or 'drainaged' based on satellite multi-temporal data (indices)

# Loading data

## Training data

In [None]:
df_train = pd.read_csv('Data/Irrigation_train.csv')

if VERBOSE:
    logger.info(f'Training data were loaded')
    logger.info(f'Number of instances:  {df_train.shape[0]}')
    logger.info(f'Number of features:   {df_train.shape[1]}')

df_train.head( 3 )

## Testing data

In [None]:
df_test = pd.read_csv('Data/Irrigation_test.csv')

if VERBOSE:
    logger.info(f'Testing data were loaded')
    logger.info(f'Number of instances:  {df_test.shape[0]}')
    logger.info(f'Number of features:   {df_test.shape[1]}')

df_test.head( 3 )

## Pre-processing data

In [None]:
# Setup Label-Encoder
#
LabelEncoding = preprocessing.LabelEncoder()

# Fit encoder
#
LabelEncoding.fit( df_train[ 'Irrigation' ] )

# Apply encoder
df_train[ 'Irrigation' ] = LabelEncoding.transform( df_train['Irrigation' ] )
df_test[ 'Irrigation' ]  = LabelEncoding.transform( df_test[ 'Irrigation']  )

if VERBOSE:
    logger.info('Target class was transformed using Label-Encoding')

# Training/Testing sets

In [None]:
# Training/Validation data
trainX = df_train.iloc[:, :-1]
trainY = df_train.iloc[:,  -1]

# Testing data
testX  = df_test.iloc[:, :-1]
testY  = df_test.iloc[:,  -1]

## Initiate MLFlow server

In [None]:
# Initiate mlflow server
# Command: mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts --host 0.0.0.0 --port 5000
# 
import mlflow
from   mlflow.models.signature import infer_signature

mlflow.set_tracking_uri("http://0.0.0.0:5000/")
mlflow.set_experiment("Irrigation-Experiment")

if VERBOSE:
    logger.info('MLFlow server is connected')

# Provide Model's Inference

## Approach 1: Load model using Model-ID

### Load model

In [None]:
# Set Model-ID
ModelID = '24a8d9e18b4d4b28b39799e55b160013'
#
logged_model = 'runs:/{}/models'.format( ModelID )
loaded_model = mlflow.pyfunc.load_model(logged_model)


if VERBOSE:
    logger.info('Model loaded using Model-ID')

### Training set evaluation

In [None]:
# Get predictions
#
pred = loaded_model.predict( trainX )

# Calculate Confusion Matrix (CM)
#
CM  = metrics.confusion_matrix(trainY, pred)
#
#
logger.info( 30*"-" )
logger.info( "*** Training set - Evaluation ***")
logger.info( "> Accuracy:  %.2f%%" % (100.0*metrics.accuracy_score( pred, trainY )) )
logger.info( "> AUC:       %.3f"   % metrics.roc_auc_score(pred, trainY) )
logger.info( "> Recall:    %.3f"   % metrics.recall_score(trainY, pred) )
logger.info( "> Precision: %.3f"   % metrics.precision_score(trainY, pred) )
logger.info( "> GM:        %.3f\n" % (math.sqrt( np.diag( CM ).prod() ) / math.sqrt( CM[0, :].sum() * CM[1, :].sum() )) )

### Testing set

In [None]:
# Get predictions
#
pred = loaded_model.predict( testX )

# Calculate Confusion Matrix (CM)
#
CM  = metrics.confusion_matrix(testY, pred)
#
#
logger.info( 30*"-" )
logger.info( "*** Testing set - Evaluation ***")
logger.info( "> Accuracy:  %.2f%%" % (100.0*metrics.accuracy_score( pred, testY )) )
logger.info( "> AUC:       %.3f"   % metrics.roc_auc_score(pred, testY) )
logger.info( "> Recall:    %.3f"   % metrics.recall_score(testY, pred) )
logger.info( "> Precision: %.3f"   % metrics.precision_score(testY, pred) )
logger.info( "> GM:        %.3f\n" % (math.sqrt( np.diag( CM ).prod() ) / math.sqrt( CM[0, :].sum() * CM[1, :].sum() )) )

## Approach 2: Load registered model

In [None]:
# Fetching an MLflow Model from the Model Registry
# Notice that the model status should be 'Staging'
#
import mlflow.pyfunc

model_name = "Irrigation_model"
model_version = 1

loaded_model = mlflow.pyfunc.load_model(
    model_uri=f"models:/{model_name}/{model_version}"
)


# > Fetch the latest model version in a specific stage
# > To fetch a model version by stage, simply provide the model stage as part of the model URI, and it will fetch the most recent version of the model in that stage.
#
# model_name = "Irrigation_model"
# stage = 'Production'

# loaded_model = mlflow.pyfunc.load_model(
#     model_uri=f"models:/{model_name}/{stage}"
# )


### Training set evaluation

In [None]:
# Get predictions
#
pred = loaded_model.predict( trainX )

# Calculate Confusion Matrix (CM)
#
CM  = metrics.confusion_matrix(trainY, pred)
#
#
logger.info( 30*"-" )
logger.info( "*** Training set - Evaluation ***")
logger.info( "> Accuracy:  %.2f%%" % (100.0*metrics.accuracy_score( pred, trainY )) )
logger.info( "> AUC:       %.3f"   % metrics.roc_auc_score(pred, trainY) )
logger.info( "> Recall:    %.3f"   % metrics.recall_score(trainY, pred) )
logger.info( "> Precision: %.3f"   % metrics.precision_score(trainY, pred) )
logger.info( "> GM:        %.3f\n" % (math.sqrt( np.diag( CM ).prod() ) / math.sqrt( CM[0, :].sum() * CM[1, :].sum() )) )

### Testing set

In [None]:
# Get predictions
#
pred = loaded_model.predict( testX )

# Calculate Confusion Matrix (CM)
#
CM  = metrics.confusion_matrix(testY, pred)
#
#
logger.info( 30*"-" )
logger.info( "*** Testing set - Evaluation ***")
logger.info( "> Accuracy:  %.2f%%" % (100.0*metrics.accuracy_score( pred, testY )) )
logger.info( "> AUC:       %.3f"   % metrics.roc_auc_score(pred, testY) )
logger.info( "> Recall:    %.3f"   % metrics.recall_score(testY, pred) )
logger.info( "> Precision: %.3f"   % metrics.precision_score(testY, pred) )
logger.info( "> GM:        %.3f\n" % (math.sqrt( np.diag( CM ).prod() ) / math.sqrt( CM[0, :].sum() * CM[1, :].sum() )) )