In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from psmpy import PsmPy
from psmpy.functions import cohenD
from psmpy.plotting import *


Read in data

In [4]:
history = pd.read_csv('../data/outputs/on/on_defoliation_history_wx.csv')

View dataset

In [5]:
history.head()

Unnamed: 0,Fire_ID,history,rbr,rbr_w_offset,Fire_Year,host_species,total_pixels,host_percentage,Average Recovery,Time_Since_Defoliation,Cumulative_Years,Max_Overlap_Area,Max_Overlap_Percent,fire_area,isi_90,dc_90,dmc_90,ffmc_90,bui_90,fwi_90
0,CHA10_2010_1589,1,562.55,532.81,2010,279.0,364.0,76.6,104.52,22,16,764973.4,100.0,1.092,13.6,133.5,52.1,93.4,52.9,24.7
1,CHA18_2005_1513,1,606.9,283.7,2005,206.0,226.0,91.2,74.98,17,17,954810.2,100.0,0.678,8.8,384.4,70.7,90.9,94.2,27.5
2,CHA29_1995_249,1,504.62,238.17,1995,43381.0,54481.0,79.6,118.38,7,17,129459400.0,100.0,163.443,6.8,483.2,72.2,88.9,102.6,21.3
3,CHA52_1991_1021,1,273.85,225.15,1991,1786.0,2786.0,64.1,226.04,3,17,14118340.0,100.0,8.358,7.5,479.9,67.1,90.8,95.5,24.8
4,CHA7_1992_829,1,275.55,336.29,1992,2428.0,3018.0,80.5,225.63,4,17,9124350.0,100.0,9.054,8.1,42.9,19.4,92.1,19.9,10.2


Initialize the PsmPy class

In [6]:
psm = PsmPy(history, treatment='history', indx='Fire_ID',
             exclude = ['rbr', 'rbr_w_offset', 'Fire_Year', 
                        'host_species', 'total_pixels', 'Time_Since_Defoliation',
                        'Cumulative_Years', 'Max_Overlap_Area',
                        'fire_area', 'isi_90', 'dc_90', 'dmc_90', 'ffmc_90', 'bui_90', 'fwi_90'])


Predict scores 
- Calculate logistic propensity scores/logits:



In [7]:
psm.logistic_ps(balance = True)

Review values in dataframe

In [8]:
psm.predicted_data

Unnamed: 0,Fire_ID,host_percentage,Average Recovery,Max_Overlap_Percent,propensity_score,propensity_logit,history
0,NIP73_2001_41,99.9,74.25,0.0,0.000277,-8.190791,0
1,RED100_2011_1783,94.9,77.45,0.0,0.000329,-8.020344,0
2,RED35_1993_1147,98.8,75.43,0.0,0.000283,-8.170186,0
3,CHA10_2010_1589,76.6,104.52,100.0,1.000000,inf,1
4,CHA18_2005_1513,91.2,74.98,100.0,1.000000,inf,1
...,...,...,...,...,...,...,...
813,THU80_2011_1830,99.9,102.89,0.0,0.000092,-9.298941,0
814,THU83_1996_292,99.1,70.50,0.0,0.000158,-8.749764,0
815,THU86_1996_1314,99.9,61.19,0.0,0.000171,-8.671822,0
816,THU89_1996_447,96.1,71.42,0.0,0.000197,-8.530837,0


In [11]:
psm.predicted_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 818 entries, 0 to 817
Data columns (total 7 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   Fire_ID              818 non-null    object 
 1   host_percentage      818 non-null    float64
 2   Average Recovery     818 non-null    float64
 3   Max_Overlap_Percent  818 non-null    float64
 4   propensity_score     818 non-null    float64
 5   propensity_logit     818 non-null    float64
 6   history              818 non-null    int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 44.9+ KB


In [14]:
# Check for inf values in the propensity_logit column
inf_values_count = np.isinf(psm.predicted_data['propensity_logit']).sum()
print(inf_values_count)

240


Preform KNN matching

In [15]:
psm.knn_matched(matcher='propensity_score', replacement=False, caliper=None, drop_unmatched=True)


In [18]:
psm.matched_ids


Unnamed: 0,Fire_ID,matched_ID
0,CHA10_2010_1589,RED38_2012_1913
1,CHA18_2005_1513,RED183_2006_1627
2,CHA29_1995_249,RED123_2011_1790
3,CHA52_1991_1021,RED94_2012_1854
4,CHA7_1992_829,RED114_2011_1778
...,...,...
262,WAW4_2012_1916,COC17_1999_738
263,WAW5_2012_1904,SLK74_1996_261
264,WAW69_1995_946,NIP37_1996_565
265,WAW86_1995_724,SLK30_2006_1581


Unnamed: 0,Fire_ID,host_percentage,Average Recovery,Max_Overlap_Percent,propensity_score,propensity_logit,history,matched_ID
0,CHA10_2010_1589,76.6,104.52,100.0,1.000000,inf,1,RED38_2012_1913
1,CHA18_2005_1513,91.2,74.98,100.0,1.000000,inf,1,RED183_2006_1627
2,CHA29_1995_249,79.6,118.38,100.0,1.000000,inf,1,RED123_2011_1790
3,CHA52_1991_1021,64.1,226.04,100.0,1.000000,inf,1,RED94_2012_1854
4,CHA7_1992_829,80.5,225.63,100.0,1.000000,inf,1,RED114_2011_1778
...,...,...,...,...,...,...,...,...
529,COC17_1999_738,100.0,66.64,0.0,0.000559,-7.489272,0,
530,SLK74_1996_261,99.6,83.36,0.0,0.000555,-7.495964,0,
531,NIP37_1996_565,99.9,67.04,0.0,0.000552,-7.500669,0,
532,SLK30_2006_1581,100.0,83.13,0.0,0.000548,-7.508649,0,
