## Credit Remaining by Vintage with Toucan

In [2]:
import numpy as np
import pandas as pd
import sklearn.linear_model as lm
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import Markdown

import scipy.stats

import warnings
warnings.filterwarnings("ignore")

In [6]:
df_credit_remaining_by_vintage = pd.read_csv('data/df_credits_remaining_by_vintage.csv')
df_credit_remaining_by_vintage.head(5)

Unnamed: 0,Project ID,Project Name,1996,1997,1998,1999,2000,2001,2002,2003,...,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022
0,ACR101,AFOVERT Energy,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,ACR102,Air Bag Gas Substitution,0,0,0,0,0,0,0,1890716,...,0,0,0,0,0,0,0,0,0,0
2,ACR103,Inland Empire Anaerobic Ag Digester,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,ACR104,Ankotrofotsy Community-based Reforestation and...,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,ACR105,Boa Vista A/R,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
combined_df = pd.DataFrame()

for project_id in df_credit_remaining_by_vintage['Project ID']:  # Assuming project_id_list is the list of project IDs
    id_df = df_credit_remaining_by_vintage[df_credit_remaining_by_vintage['Project ID'] == project_id]

    id_df = pd.melt(id_df, id_vars=['Project ID', 'Project Name'], var_name='Year', value_name='Value')
    id_df['Year'] = pd.to_numeric(id_df['Year'], errors='coerce')
    id_df_yearly_sums = id_df.groupby('Year')['Value'].sum()

    combined_df[project_id] = id_df_yearly_sums

combined_df = combined_df.reset_index()
combined_df.columns = ['Year'] + combined_df.columns.tolist()[1:]
combined_df

Unnamed: 0,Year,ACR101,ACR102,ACR103,ACR104,ACR105,ACR106,ACR107,ACR108,ACR109,...,VCS992,VCS993,VCS994,VCS995,VCS996,VCS997,VCS998,VCS999,VCSOPR10,VCSOPR2
0,1996,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1997,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,1998,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,1999,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,2000,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2001,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
6,2002,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
7,2003,0,1890716,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
8,2004,0,3004613,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
9,2005,0,3071011,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [13]:
credits_remaining_reshaped_df = combined_df.melt(id_vars=['Year'], var_name='Project ID', value_name='Value')
credits_remaining_reshaped_df 

Unnamed: 0,Year,Project ID,Value
0,1996,ACR101,0
1,1997,ACR101,0
2,1998,ACR101,0
3,1999,ACR101,0
4,2000,ACR101,0
...,...,...,...
214186,2018,VCSOPR2,0
214187,2019,VCSOPR2,0
214188,2020,VCSOPR2,0
214189,2021,VCSOPR2,0


In [18]:
toucan_pivoted_credits_remaining = pd.read_csv('data/toucan_pivoted_credits_remaining.csv')
toucan_pivoted_credits_remaining

Unnamed: 0,Year,Project ID,Credits Remaining
0,1996.0,VCS10,0
1,1997.0,VCS10,0
2,1998.0,VCS10,0
3,1999.0,VCS10,0
4,2000.0,VCS10,0
...,...,...,...
2803,2018.0,VCS986,29746
2804,2019.0,VCS986,48260
2805,2020.0,VCS986,42055
2806,2021.0,VCS986,3245


In [17]:
credits_remaining_reshaped_df["isToucan"] = credits_remaining_reshaped_df['Project ID'].isin(toucan_pivoted_credits_remaining['Project ID'])
credits_remaining_reshaped_df

Unnamed: 0,Year,Project ID,Value,isToucan
0,1996,ACR101,0,False
1,1997,ACR101,0,False
2,1998,ACR101,0,False
3,1999,ACR101,0,False
4,2000,ACR101,0,False
...,...,...,...,...
214186,2018,VCSOPR2,0,False
214187,2019,VCSOPR2,0,False
214188,2020,VCSOPR2,0,False
214189,2021,VCSOPR2,0,False


In [19]:
sum(credits_remaining_reshaped_df["isToucan"])

2808

In [20]:
credits_remaining_reshaped_df.to_csv("data/credits_remaining_reshaped_df.csv")