# Using Maxent as base model
Yangkang Chen<br>
Sep 5, 2023

In [1]:
import pandas as pd
import numpy as np
import random
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import matplotlib
import warnings
import pickle
import geopandas as gpd
import os
import json

# matplotlib.style.use('ggplot')
# plt.rcParams['axes.facecolor']='w'
# warnings.filterwarnings('ignore')
%matplotlib inline

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
data = pd.read_csv(f'./BirdSTEM/dataset/test_data/Sample_data_Mallard.csv')

# Get X and y

In [4]:
X = data.drop('count', axis=1)
y = data['count'].values


# First thing first: Spatio-temporal train test split

In [5]:
from BirdSTEM.model_selection import ST_train_test_split
X_train, X_test, y_train, y_test = ST_train_test_split(X, y, 
                                                       Spatio_blocks_count = 50, Temporal_blocks_count=50,
                                                       random_state=42, test_size=0.3)

# Train AdaSTEM hurdle model

In [65]:
from BirdSTEM.model.AdaSTEM import AdaSTEM, AdaSTEMClassifier, AdaSTEMRegressor
from xgboost import XGBClassifier, XGBRegressor
from BirdSTEM.model.Hurdle import Hurdle_for_AdaSTEM, Hurdle
import elapid as ela


In [66]:
model = AdaSTEMClassifier(base_model=ela.MaxentModel(transform='cloglog', beta_multiplier=2.0),
                                save_gridding_plot = True,
                                ensemble_fold=10, 
                                min_ensemble_required=7,
                                grid_len_lon_upper_threshold=50,
                                grid_len_lon_lower_threshold=5,
                                grid_len_lat_upper_threshold=50,
                                grid_len_lat_lower_threshold=5,
                                temporal_step=50,
                                temporal_bin_interval=100,
                                points_lower_threshold=500)


In [50]:
# new_X_train = X_train.copy()
# new_X_train['y_'] = y_train
# new_X_train = new_X_train.sample(5000)
# new_X_train.iloc[:,:-1], np.where(new_X_train.iloc[:,-1].values>0,1,0)

In [67]:
model.fit(X_train, np.where(y_train>0,1,0))

Generating Ensemble: 100%|██████████| 10/10 [00:42<00:00,  4.26s/it]
training:   2%|▏         | 175/9502 [04:17<31:20,  4.96it/s]  

In [None]:
with open('./test_output/maxent_adastem.pkl', 'wb') as f:
    pickle.dump(model, f)
    

42        0
65        1
1374      0
1529      0
1797      1
         ..
282358    1
282559    0
282642    0
282695    0
283399    0
Name: true_y, Length: 851, dtype: int64

In [None]:
pred_adastem = model.predict(X_test)


In [None]:
pred_df = pd.DataFrame({
            'y_true':y_test.flatten(),
            'y_pred_adastem':np.where(pred_adastem.flatten()<0, 0, pred_adastem.flatten()),
        }).dropna()


In [None]:
metric_dict = AdaSTEM.eval_STEM_res('hurdle', np.array(pred_df.y_true).flatten(), 
                                            np.where(np.array(pred_df.y_pred_adastem).flatten()<0, 0, np.array(pred_df.y_pred_adastem).flatten())
                                            )


In [None]:
with open('./test_output/Maxent_AdasTEM_metrics.json', 'w') as f:
    json.dump(metric_dict, f)
    

# Compare to simple Maxent model

In [None]:
model_me = ela.MaxentModel(transform='cloglog', beta_multiplier=2.0)

In [None]:
model_me.fit(X_train.drop(['longitude','latitude'], axis=1), np.where(y_train>0,1,0))

Generating Ensemble: 100%|██████████| 10/10 [00:38<00:00,  3.90s/it]
training: 100%|██████████| 16179/16179 [04:41<00:00, 57.54it/s] 


In [None]:
with open('./test_output/simple_maxent.pkl', 'wb') as f:
    pickle.dump(model_me, f)
    

In [None]:
pred_me = model.predict(X_test.drop(['longitude','latitude'], axis=1))


In [None]:
pred_df = pd.DataFrame({
    'y_true':y_test.flatten(),
    'y_pred':np.where(pred_me.flatten()>0.5, 1, 0)
}).dropna()


In [None]:
metrics_me = AdaSTEM.eval_STEM_res('hurdle', pred_df.y_true, pred_df.y_pred)


{'AUC': nan,
 'kappa': nan,
 'f1': nan,
 'precision': nan,
 'recall': nan,
 'average_precision': nan,
 'Spearman_r': nan,
 'Pearson_r': nan,
 'R2': 1.0,
 'MAE': 0.0,
 'MSE': 0.0,
 'poisson_deviance_explained': nan}

In [None]:
with open('./test_output/Smple_Maxent_metrics.json', 'w') as f:
    json.dump(metrics_me, f)
    

Unnamed: 0,y_true,y_pred
16,0.0,0.0
26,0.0,0.0
39,0.0,0.0
47,0.0,0.0
58,0.0,0.0
...,...,...
116158,0.0,0.0
116167,0.0,0.0
116193,0.0,0.0
116199,0.0,0.0
