In [5]:
import sys
sys.path.append('/Users/darryl/proj/carbonplan/retro/')

from retrospective.load.issuance import load_issuance_table, ifm_opr_ids

In [97]:
import numpy as np

In [43]:
from retrospective.load.project_db import load_project_db

In [3]:
df = load_issuance_table('/Users/darryl/forest-retro/documents-of-interest/arb/issuance/arboc_issuance_2020-09-09.xlsx',
                         forest_only=False)

In [4]:
total_issued_arbocs = df['allocation'].sum()

In [6]:
ifms = df[df['opr_id'].isin(ifm_opr_ids)]

In [14]:
df[df['project_type'] == 'forest'].opr_id.nunique()

130

In [9]:
len(ifm_opr_ids)

93

Early Action ARBOCs are actually a fairly sizable fraction of the overall program -- weighing in at 12.5 percent. 
I acutally find this a little surprising...
EA projects also have similar skew toward forests. 

In [67]:
total_ea_arbocs = df[df['is_ea'] == 1].allocation.sum()

fraction_arboc_ea = total_ea_arbocs / total_issued_arbocs
display(f"{fraction_arboc_ea* 100:0.2f} percent ARBOCs in the issuance table come from EA period")

forest_ea_arbocs =  df[(df['is_ea'] == 1) & (df['project_type'] == 'forest')].allocation.sum()
fraction_arboc_forest_ea = forest_ea_arbocs / total_issued_arbocs

display(f"And EA forest projects represent {fraction_arboc_forest_ea*100:.2f} percent of all issued ARBOCs")

forest_ea_as_frac_of_ea = forest_ea_arbocs / total_ea_arbocs
display(f"This means EA forests represent a little over half of all EA ARBOCs ({forest_ea_as_frac_of_ea * 100:.2f} percent to be exact)")

'12.53 percent ARBOCs in the issuance table come from EA period'

'And EA forest projects represent 6.88 percent of all issued ARBOCs'

'This means EA forests represent a little over half of all EA ARBOCs (54.89 percent to be exact)'

## Graduated projects

In [142]:
project_db = load_project_db('Forest-Offset-Projects-v0.3', use_cache=True)

graduated_projects = project_db[project_db['project']['early_action'].str.startswith('CAR')]['project']['early_action'].to_dict()

graduated_projects['CAR1109'] = 'CAR749' # One AC project converted over...ha
graduated_projects

loading load Forest-Offset-Projects-v0.3 from /Users/darryl/proj/carbonplan/retro/data


{'CAR1063': 'CAR657',
 'CAR1161': 'CAR681',
 'CAR1162': 'CAR655',
 'CAR1159': 'CAR696',
 'CAR1134': 'CAR658',
 'CAR1140': 'CAR661',
 'CAR1099': 'CAR660',
 'CAR1067': 'CAR1004',
 'CAR1086': 'CAR648',
 'CAR1147': 'CAR697',
 'CAR1100': 'CAR408',
 'CAR1070': 'CAR777',
 'CAR1130': 'CAR582',
 'CAR1088': 'CAR645',
 'CAR1141': 'CAR101',
 'CAR1139': 'CAR730',
 'CAR1062': 'CAR780',
 'CAR1098': 'CAR102',
 'CAR1160': 'CAR686',
 'CAR1109': 'CAR749'}

In [94]:
non_grads = df[(df['project_type'] == 'forest') & (df['is_ea']) & (~df['opr_id'].isin(graduated_projects.values()))]['opr_id'].unique().tolist()

In [95]:
graduated_list = list(graduated_projects.values()) + list(graduated_projects.keys())

In [98]:
df['project_category'] = np.nan

In [103]:
non_graduated_counts = {'ifm': 3, 'ac': 6}
graduated_counts = {'ifm':19, 'ac': 1}
compliance_counts = {'ifm': 74, 'ac':5, 'refor': 2}

In [99]:
df.loc[df['project_type'] == 'forest', 'project_category'] = 'compliance'
df.loc[(df['project_type'] == 'forest') & (df['opr_id'].isin(non_grads)), 'project_category'] = 'non-graduated'
df.loc[(df['project_type'] == 'forest') & (df['opr_id'].isin(graduated_list)), 'project_category'] = 'graduated'

In [104]:
df['guid'] = df['opr_id']
df.loc[df['opr_id'].isin(graduated_projects.keys()), 'guid'] = df['opr_id'].map(graduated_projects)

In [115]:
forest_share_total = (df.groupby(df['project_type'] == 'forest')['allocation'].sum() / df['allocation'].sum()).rename('forest_share_total')

## Total forests

In [121]:
display(f"Across EA and COP there are {forests['guid'].nunique()} unique forest projects")

'Across EA and COP there are 110 unique forest projects'

We can break those 110 projects down into three categories of projects: compliance, graduate, and non-graduated projects

In [183]:
def danny_table_stats(subset, total_arbocs):
    return {'n_proj': subset['guid'].nunique(),
     'allocation': subset['allocation'].sum(),
     'frac_total_arbocs': round(subset['allocation'].sum()/total_arbocs, 3)}

In [184]:
initial_forests = df[(df['project_type'] == 'forest') & (df['arb_rp_id'].isin(['A', '(']))] # '(' catches the two reforestation projects

In [185]:
ics_gt_cp_lst = project_db[project_db['baseline']['initial_carbon_stock'] > project_db['baseline']['common_practice']].index.tolist()

ics_gt_cp_lst = [x for x in ics_gt_cp_lst if x not in graduated_projects.keys()]

len(ics_gt_cp_lst)

72

In [201]:
subsets = {'all_forest': initial_forests,
          'compliance_ifm': initial_forests[(initial_forests['project_category'] == 'compliance') & (initial_forests['opr_id'].isin(ifm_opr_ids))],
          'ifm_ics_gt_cp': initial_forests[initial_forests['opr_id'].isin(ics_gt_cp_lst)]}

In [202]:
import pandas as pd

In [203]:
initial_forest_table = pd.DataFrame({k: danny_table_stats(v, total_issued_arbocs) for k, v in subsets.items()}).T

In [204]:
initial_forest_table.rename(columns={'n_proj': 'Project Count', 'allocation': "First RP ARBOCs", 'frac_total_arbocs':"Fraction of Total Issued ARBOCs"})

Unnamed: 0,Project Count,First RP ARBOCs,Fraction of Total Issued ARBOCs
all_forest,110.0,131853870.0,0.683
compliance_ifm,74.0,121198020.0,0.628
ifm_ics_gt_cp,72.0,120954134.0,0.627


In [123]:
df.groupby('project_category')['guid'].nunique()

project_category
compliance       81
graduated        20
non-graduated     9
Name: guid, dtype: int64

In [125]:
display(f"Compliance Forest projects represent {81/110. * 100:.2f} percent of all forest projects")

'Compliance Forest projects represent 73.64 percent of all forest projects'

In [127]:

display(f"{compliance_counts['ifm']/93.}")

'0.7956989247311828'

In [49]:
display(f"There are {len(graduated_projects)} graduated projects")

'There are 19 graduated projects'

In [58]:
graduated_arbocs = df[df['opr_id'].isin(graduated_projects)].allocation.sum()
fraction_arbocs_from_graduated = graduated_arbocs/total_issued_arbocs

display(f"Graudated projects represent just {fraction_arbocs_from_graduated *100:0.2f} percent of all issued arbocs")

'Graudated projects represent just 2.92 percent of all issued arbocs'

So if graduated projects are 2.92 percent of all ARBOCs and EA forests are 6.88 percent of all issued ARBOCs, EA+graduated projects represent less than 10 percent of all ARBOCs issued to date. 

In [15]:
total_ifm_issued_arbocs = ifms['allocation'].sum()

In [16]:
initial_ifm_arbocs = ifms[ifms['arb_rp_id'] == 'A']['allocation'].sum()

In [17]:
display(f"IFMs represent {total_ifm_issued_arbocs/total_issued_arbocs * 100:.2f} percent of all compliance (EA excluded) ARBOCS")

'IFMs repsent 71.04 percent of all compliance (EA excluded) ARBOCS'

In [18]:
initial_frac_total_ifm = initial_ifm_arbocs/total_ifm_issued_arbocs
initial_frac_total_issued = initial_ifm_arbocs/total_issued_arbocs

In [68]:
display(f"Initial IFM ARBOCs represent {initial_frac_total_ifm * 100:.2f} percent of all ARBOCs issued to compliance IFM projects")

'Initial IFM ARBOCs represent 90.33 percent of all ARBOCs issued to compliance IFM projects'

In [69]:
display(f"Initial IFMs ARBOCs represent {initial_frac_total_issued * 100:.2f} percent of all issued ARBOCS")

'Initial IFMs ARBOCs represent 64.17 percent of all issued ARBOCS'

## Some other stuff

In [90]:
from retrospective.load.retro import retro


retro_db = retro("Forest-Offset-Projects-v0.3", use_cache=True)

graduated_projects = [
    k
    for k, v in retro_db["project"]
    .set_index("opr_id")["early_action"]
    .to_dict()
    .items()
    if v.startswith("CAR")
]

loading load Forest-Offset-Projects-v0.3 from /Users/darryl/proj/carbonplan/retro/data


In [86]:
def initial_delta(data):
    """ For projects with more than one reporting period, how much bigger was 
    initial than subsequent? 
    """
    if 'B' in data['arb_rp_id'].unique().tolist():
        
        initial = data[data['arb_rp_id'] == 'A']['allocation'].sum()
        # have to aggregate by arb_rp_id first because issuances can be rolling (rare -- only know of one case)
        avg_subsequent = data[data['arb_rp_id'] != 'A'].groupby('arb_rp_id')['allocation'].sum().mean()
        return initial/avg_subsequent
    return None

In [94]:
ifms[~ifms.opr_id.isin(graduated_projects)].groupby(['opr_id']).apply(initial_delta).sort_values().dropna().describe()

count    41.000000
mean     20.416665
std      15.040035
min       0.537271
25%       9.786371
50%      15.499460
75%      28.218495
max      70.459507
dtype: float64