In [1]:
import pandas as pd

import numpy as np

from datetime import datetime

import requests
from requests.exceptions import RequestException
import json

from datetime import datetime




IMPORT RELEVANT CSV FILE


In [2]:
# Main CDM Registry project datasource from Database PA POA
df_activities= pd.read_excel('./input/CDM-Activities-November.xlsx')

# Country and geographic identifier file for easier mapping
df_country_code = pd.read_excel('./input/cdm-country-code.xlsx')


# Countries in CADTrust, to check with the mismatch from UNFCCC in later data processing
df_cadt_country = pd.read_excel("./input/CADTrust_Country.xlsx")

# LOAD UNEP file for PDD mapping
df_unep = pd.read_excel("./input/cdm-pipeline.xlsx")







INITIAL CLEANUP


In [3]:

# FILL CDM project reference number NAN value to 0, since there are almost 3000 of them

df_activities['CDM project reference number'] = df_activities['CDM project reference number'].fillna(0).astype(int)


# Check projects with more than one host countries/ "List of host countries (ISO 2)"
more_countries = df_activities[df_activities["List of host countries (ISO 2)"].str.len() > 2]

# Create excel files for projects with more than one countries/ "List of host countries (ISO 2)",
more_countries.to_excel("./output/CDM_projects_many_countries.xlsx")

# Map to CAD Trust based on the first country in the column
df_activities["List of host countries (ISO 2)"] = df_activities["List of host countries (ISO 2)"].str[:2]

# Apply geographic identifier that is suitable for CAD Trust mapping
df_country_code['geographicIdentifier'] = df_country_code.apply(
    lambda row: "{\"latitude\":" + str(row["latitude"]) + ",\"longitude\":" + str(row["longitude"]) + "}", axis=1
)

# Key Value mapping of PDD Consultant for CAD Trust projectDeveloper
pdd_developer_map = df_unep.set_index('Unique project ID')['PDD Consultant'].to_dict()

# Key Value mapping of reference number to unique identifier that will be useful for ISSUANCES and UNITS
id_to_id = df_activities.set_index('CDM project reference number')['Unique project identifier (traceable with Google)'].to_dict()

#Key Value mapping Country Code to CAD Trust Country Field
country_name_map = df_country_code.set_index('country')['cadtcountry'].to_dict()


#Key Value mapping Country Code to Geography Map
geography_map = df_country_code.set_index('name')['geographicIdentifier'].to_dict()

#Key Value mapping verification report date
verification_report_date_map = df_activities.set_index('Unique project identifier (traceable with Google)')['Start of first crediting period'].to_dict()




# Map to status fields from CDM to CAD Trust Project Status Picklist Value
project_status_map = {
    "Registered": "Registered",
    "Rejected": "Withdrawn",
    "Withdrawn":"Withdrawn",
    "WithdrawnBeforePublication": "Withdrawn",
    "Withdrawn Before Publication": "Withdrawn",
    "Deregistered": "De-registered",
    "Pending Publication": "Authorized",
    "Provisional": "Validated",
    "Validation Replaced": "Validated",
    "Requesting Registration": "Listed",
    "Validation Public": "Validated",
    "Validation Terminated": "Withdrawn",
    "NA":"Listed",
    "":"Listed",
    np.nan: "Listed"  # Handling actual NaN values
}
# Map from sector code number to it's description
sector_code_map = {
  "1": 'Energy industries (renewable - / non-renewable sources)',
  "2": 'Energy distribution',
  "3": 'Energy demand',
  "4": 'Manufacturing industries',
  "5": 'Chemical industries',
  "6": 'Construction',
  "7": 'Transport',
  "8": 'Mining/mineral production',
  "9": 'Metal production',
  "10": 'Fugitive emissions from fuels (solid, oil and gas)',
  "11": 'Fugitive emissions from production and consumption of halocarbons and sulphur hexafluoride',
  "12": 'Solvent use',
  "13": 'Waste handling and disposal',
  "14": 'Afforestation and reforestation',
  "15": 'Agriculture'
}


# basic cleanup
def fillna_by_dtype(df):
    for column in df.columns:
        if pd.api.types.is_numeric_dtype(df[column]):  # Check if the column is numeric
            df[column] = df[column].fillna(0)
        elif pd.api.types.is_object_dtype(df[column]):  # Check if the column is of object type (e.g., strings)
            df[column] = df[column].fillna('NA')
        elif pd.api.types.is_datetime64_any_dtype(df[column]):  # Check if the column is datetime
            df[column] = df[column].fillna(pd.Timestamp.min)
    return df

    


set_cadt_country = set(df_cadt_country['Country'])
set_country_code = set(df_country_code['name'])

# Find non-matching elements
print(set_cadt_country - set_country_code)
print(set_country_code - set_cadt_country)





{'State of Palestine', 'Viet Nam', 'Bosnia and  Herzegovina', 'United States of America', 'Sao Tome and Principe', 'Republic of Korea', 'The Former Yugoslav Republic of Macedonia', 'South Sudan', "Democratic People's Republic of Korea", 'United Republic of Tanzania', "Lao People's Democratic Republic", "Cote d'Ivoire", 'Cabo Verde', 'Congo', 'European Union', 'Myanmar', 'Guinea Bissau', 'Equatorial New Guinea', 'Democratic Republic of Congo', 'Antigua and Barduba', 'Brunei Darussalam', 'Domincan Republic', 'Russian Federation'}
{'Russia', 'New Caledonia', 'Svalbard and Jan Mayen', 'Zambia', 'Saint Helena', 'Mayotte', 'U.S. Virgin Islands', 'Ecuador', 'Falkland Islands [Islas Malvinas]', 'Christmas Island', 'Isle of Man', 'Dominican Republic', 'Turks and Caicos Islands', 'Cayman Islands', 'Kazakhstan', 'Cocos [Keeling] Islands', 'Montserrat', 'Anguilla', 'Seychelles', 'Northern Mariana Islands', 'Pitcairn Islands', "Côte d'Ivoire", 'Antarctica', 'Iraq', 'São Tomé and Príncipe', 'Hong Ko

In [4]:
# Rename the df column to match CADTrust field

rename_columns  = {
  "Unique project identifier (traceable with Google)": "originProjectId",
  "Registration project title": "projectName",
  "Type of CDM project: PA/PoA": "program",
  "Sectoral scope number(s)": "sector",
  "Project type (UNEP DTU)": "projectType",
  "Website project status": "projectStatus",
  "Methodologies used at registration": "methodology",
  "DOE": "validationBody",
  "Start of validation": "validationDate",
  "Country subregion":"inCountryRegion",
  "List of host countries (ISO 2)": "country",

  'Start of first crediting period':'creditingPeriodStart',
  'End of first crediting period':'creditingPeriodEnd',
  'Total CERs issued': 'unitCount'
}

# "creditingPeriodStart":"2022-02-04T00:00:00.000Z",
#         "creditingPeriodEnd":"2022-03-04T00:00:00.000Z",
#         "unitCount":100,

df_project = df_activities[rename_columns.keys()].rename(columns=rename_columns)

print("Data types of specific columns:")
print("creditingPeriodStart:", df_project['creditingPeriodStart'].dtype)
print("creditingPeriodEnd:", df_project['creditingPeriodEnd'].dtype)
print("unitCount:", df_project['unitCount'].dtype)

df_project['creditingPeriodStart'] = pd.to_datetime(df_project['creditingPeriodStart'], errors='coerce')
df_project['creditingPeriodEnd'] = pd.to_datetime(df_project['creditingPeriodEnd'], errors='coerce')

df_project['unitCount'] = df_project['unitCount'].replace("-", 0).fillna(0)

df_project['estimations'] = df_project.apply(lambda row: [{
    "creditingPeriodStart": row['creditingPeriodStart'].strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' if pd.notnull(row['creditingPeriodStart']) else None,
    "creditingPeriodEnd": row['creditingPeriodEnd'].strftime('%Y-%m-%dT%H:%M:%S.%f')[:-3] + 'Z' if pd.notnull(row['creditingPeriodEnd']) else None,
    "unitCount": row['unitCount']
}], axis=1)

df_project.drop(columns=['creditingPeriodStart', 'creditingPeriodEnd', 'unitCount'], inplace=True)

print(df_project.columns)

# Adding other relevant values to CAD Trust field
df_project["registryOfOrigin"]= "CDM Registry"
df_project["currentRegistry"]= "CDM Registry"
df_project['unitMetric'] = 'tCO2e'
df_project['methodology'] = "CDM - " + df_project['methodology']
df_project['projectId'] = df_project['originProjectId']
df_project['projectLink'] = 'https://cdm.unfccc.int/Projects/Validation/DB/' +df_project['projectId']+"/view.html"
df_project['coveredByNDC'] = "Unknown"
df_project['projectStatusDate'] = datetime.now().strftime('%Y-%m-%d')

# Map the countries
df_project['country'] = df_project['country'].map(country_name_map)
df_project['sector'] = df_project['sector'].map(sector_code_map)
df_project['projectDeveloper'] = df_project['projectId'].map(pdd_developer_map)
df_project["geographicIdentifier"]= df_project['country'].map(geography_map)
df_project['description'] = df_project['projectName']
df_project['ndcInformation'] = "NA"

# Implement the fillna function created earlier
df_project = fillna_by_dtype(df_project)
df_project["projectStatus"]= df_project['projectStatus'].map(project_status_map)


projID_to_country = df_project.set_index('projectId')['country'].to_dict()



# Output to check the new DataFrame
df_project.to_excel("./output/CDM-Projects-mapped.xlsx")

# df_project['country'].head(10)


df_project



Data types of specific columns:
creditingPeriodStart: object
creditingPeriodEnd: object
unitCount: int64
Index(['originProjectId', 'projectName', 'program', 'sector', 'projectType',
       'projectStatus', 'methodology', 'validationBody', 'validationDate',
       'inCountryRegion', 'country', 'estimations'],
      dtype='object')


Unnamed: 0,originProjectId,projectName,program,sector,projectType,projectStatus,methodology,validationBody,validationDate,inCountryRegion,...,currentRegistry,unitMetric,projectId,projectLink,coveredByNDC,projectStatusDate,projectDeveloper,geographicIdentifier,description,ndcInformation
0,AuxID,Project for GHG emission reduction by thermal ...,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,SGS United Kingdom Limited (SGS),2003-12-01 00:00:00,Southern Asia,...,CDM Registry,tCO2e,AuxID,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,PricewaterhouseCoopers,"{""latitude"":20.593684,""longitude"":78.96288}",Project for GHG emission reduction by thermal ...,
1,LJ80OQZR2JVOLTZZX4Y0EGB1N18AVO,HFC Decomposition Project in Ulsan,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,Japan Quality Assurance Organisation (JQA),2003-12-11 00:00:00,Eastern Asia,...,CDM Registry,tCO2e,LJ80OQZR2JVOLTZZX4Y0EGB1N18AVO,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,Climate Experts,,HFC Decomposition Project in Ulsan,
2,SQA16OAS75TKWYZTTKK7G1LO0O8CMU,Brazil NovaGerar Landfill Gas to Energy Project,PA,Waste handling and disposal,Landfill gas,Registered,CDM - AM0003,Det Norske Veritas- CUK,2004-04-05 00:00:00,South America,...,CDM Registry,tCO2e,SQA16OAS75TKWYZTTKK7G1LO0O8CMU,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,EcoSecurities,"{""latitude"":-14.235004,""longitude"":-51.92528}",Brazil NovaGerar Landfill Gas to Energy Project,
3,HG20LFVJPFURJJ4M6E6MS07HTOZTR4,La Esperanza Hydroelectric Project,PA,Energy industries (renewable - / non-renewable...,Hydro,Registered,CDM - AMS-I.D.,Det Norske Veritas- CUK,2004-07-04 00:00:00,Central America,...,CDM Registry,tCO2e,HG20LFVJPFURJJ4M6E6MS07HTOZTR4,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,"WB-CF, 2E Carbon Access","{""latitude"":15.199999,""longitude"":-86.241905}",La Esperanza Hydroelectric Project,
4,V3ZBEVSXGFNT31E2CECR2NYE93FW5L,Project for GHG Emission Reduction by Thermal ...,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,Japan Quality Assurance Organisation (JQA),2005-12-06 00:00:00,Eastern Asia,...,CDM Registry,tCO2e,V3ZBEVSXGFNT31E2CECR2NYE93FW5L,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,"WB-CF, SEPA FECO","{""latitude"":35.86166,""longitude"":104.195397}",Project for GHG Emission Reduction by Thermal ...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13168,QJD9XJ5HBYCRZR6YV9ELX4WLEC5BJ1,Reduction in steam consumption through revampi...,PA,Energy demand,EE Industry,Withdrawn,CDM - AM0018,Bureau Veritas Certification Holding SAS (BVCH),2007-03-21 00:00:00,Southern Asia,...,CDM Registry,tCO2e,QJD9XJ5HBYCRZR6YV9ELX4WLEC5BJ1,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,Rashtriya Chemicals & Fertilizers,"{""latitude"":20.593684,""longitude"":78.96288}",Reduction in steam consumption through revampi...,
13169,4FFA579VM5UA6G3BHJYNRH5Q5Q6S6W,Usina Petribu Renewable Generation with Sugarc...,PA,Energy industries (renewable - / non-renewable...,Biomass Energy,Withdrawn,CDM - ACM0006,SGS United Kingdom Limited (SGS),2006-04-12 00:00:00,South America,...,CDM Registry,tCO2e,4FFA579VM5UA6G3BHJYNRH5Q5Q6S6W,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,MaxAmbiental,"{""latitude"":-14.235004,""longitude"":-51.92528}",Usina Petribu Renewable Generation with Sugarc...,
13170,R33WIRKXKCNOCGHYW84XRF3SJ0KKNI,Power generation from renewable sources  Aiur...,PA,Energy industries (renewable - / non-renewable...,Hydro,Withdrawn,CDM - ACM0002,RINA Services S.p.A. (RINA),2009-05-08 00:00:00,South America,...,CDM Registry,tCO2e,R33WIRKXKCNOCGHYW84XRF3SJ0KKNI,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,Waycarbon,"{""latitude"":-14.235004,""longitude"":-51.92528}",Power generation from renewable sources  Aiur...,
13171,GT5ME4TC32NKLA6A22ZBR00FYBC54N,Roaring 40s Wind Farms Private Limited.,PA,Energy industries (renewable - / non-renewable...,Wind,Validated,CDM - ACM0002,Det Norske Veritas- CUK,2007-06-07 00:00:00,Southern Asia,...,CDM Registry,tCO2e,GT5ME4TC32NKLA6A22ZBR00FYBC54N,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,Satia Paper Mills,"{""latitude"":20.593684,""longitude"":78.96288}",Roaring 40s Wind Farms Private Limited.,


Findings After Failing to Push data to Chia staging API

In [5]:
# Yemen and Cape Verde not in CAD Trust picklist. This require approval from technical committee otherwise we cant push the data
# 5 projects in total
df_project_yemen_cape_verde = df_project[(df_project['country'] == 'Yemen') | (df_project['country'] == 'Cape Verde')]
# df_with_nan_cdmID has One nan value which is project CDM 10121 row 8459

df_with_nan_cdmID = df_project[df_project['projectId']=='NA']

# Projects that have projectID, but the rest are not filled properly
df_with_nan_validation_date = df_project[df_project['validationDate']=='NA']

# Telling this issue to unfccc
df_to_inform_UNFCCC_1 = df_activities[df_activities['Unique project identifier (traceable with Google)']
                                        .isna()]

df_to_inform_UNFCCC_2 = df_activities[df_activities['Unique project identifier (traceable with Google)']
                                        .isin(df_with_nan_validation_date['projectId'])]

# Concatenating the two DataFrame into one
df_to_inform_UNFCCC = pd.concat([df_to_inform_UNFCCC_1, df_to_inform_UNFCCC_2])

df_to_inform_UNFCCC.to_excel("./output/Uncleaned_CDM_Projects.xlsx")



DROP THE ROWS OF DATA MENTIONED ABOVE. AFTER THAT, the API call should be smoothly post all the entire data to CAD Trust chia datalayer local staging mode

In [6]:

# df_cleaned_project = df_project[~((df_project['country'] == 'Yemen') | (df_project['country'] == 'Cape Verde')| (df_project['country'] == 'Bhutan'))]

# Select rows where projectId is not 'NA'
df_cleaned_project = df_project[df_project['projectId'] != 'NA']


df_cleaned_project = df_cleaned_project[
    ~df_project['country'].isin([ 'Bhutan'])]


# Select rows where validationDate is not 'NA'
df_cleaned_project = df_cleaned_project[df_cleaned_project['validationDate'] != 'NA']

df_cleaned_project.to_excel("./output/cdm_projects_mapped_cleaned.xlsx")
# df_cleaned_project
df_bhutan = df_project[df_project['country'] == 'Bhutan']
df_bhutan

df_cleaned_project



  df_cleaned_project = df_cleaned_project[


Unnamed: 0,originProjectId,projectName,program,sector,projectType,projectStatus,methodology,validationBody,validationDate,inCountryRegion,...,currentRegistry,unitMetric,projectId,projectLink,coveredByNDC,projectStatusDate,projectDeveloper,geographicIdentifier,description,ndcInformation
0,AuxID,Project for GHG emission reduction by thermal ...,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,SGS United Kingdom Limited (SGS),2003-12-01 00:00:00,Southern Asia,...,CDM Registry,tCO2e,AuxID,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,PricewaterhouseCoopers,"{""latitude"":20.593684,""longitude"":78.96288}",Project for GHG emission reduction by thermal ...,
1,LJ80OQZR2JVOLTZZX4Y0EGB1N18AVO,HFC Decomposition Project in Ulsan,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,Japan Quality Assurance Organisation (JQA),2003-12-11 00:00:00,Eastern Asia,...,CDM Registry,tCO2e,LJ80OQZR2JVOLTZZX4Y0EGB1N18AVO,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,Climate Experts,,HFC Decomposition Project in Ulsan,
2,SQA16OAS75TKWYZTTKK7G1LO0O8CMU,Brazil NovaGerar Landfill Gas to Energy Project,PA,Waste handling and disposal,Landfill gas,Registered,CDM - AM0003,Det Norske Veritas- CUK,2004-04-05 00:00:00,South America,...,CDM Registry,tCO2e,SQA16OAS75TKWYZTTKK7G1LO0O8CMU,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,EcoSecurities,"{""latitude"":-14.235004,""longitude"":-51.92528}",Brazil NovaGerar Landfill Gas to Energy Project,
3,HG20LFVJPFURJJ4M6E6MS07HTOZTR4,La Esperanza Hydroelectric Project,PA,Energy industries (renewable - / non-renewable...,Hydro,Registered,CDM - AMS-I.D.,Det Norske Veritas- CUK,2004-07-04 00:00:00,Central America,...,CDM Registry,tCO2e,HG20LFVJPFURJJ4M6E6MS07HTOZTR4,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,"WB-CF, 2E Carbon Access","{""latitude"":15.199999,""longitude"":-86.241905}",La Esperanza Hydroelectric Project,
4,V3ZBEVSXGFNT31E2CECR2NYE93FW5L,Project for GHG Emission Reduction by Thermal ...,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,Japan Quality Assurance Organisation (JQA),2005-12-06 00:00:00,Eastern Asia,...,CDM Registry,tCO2e,V3ZBEVSXGFNT31E2CECR2NYE93FW5L,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,"WB-CF, SEPA FECO","{""latitude"":35.86166,""longitude"":104.195397}",Project for GHG Emission Reduction by Thermal ...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13168,QJD9XJ5HBYCRZR6YV9ELX4WLEC5BJ1,Reduction in steam consumption through revampi...,PA,Energy demand,EE Industry,Withdrawn,CDM - AM0018,Bureau Veritas Certification Holding SAS (BVCH),2007-03-21 00:00:00,Southern Asia,...,CDM Registry,tCO2e,QJD9XJ5HBYCRZR6YV9ELX4WLEC5BJ1,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,Rashtriya Chemicals & Fertilizers,"{""latitude"":20.593684,""longitude"":78.96288}",Reduction in steam consumption through revampi...,
13169,4FFA579VM5UA6G3BHJYNRH5Q5Q6S6W,Usina Petribu Renewable Generation with Sugarc...,PA,Energy industries (renewable - / non-renewable...,Biomass Energy,Withdrawn,CDM - ACM0006,SGS United Kingdom Limited (SGS),2006-04-12 00:00:00,South America,...,CDM Registry,tCO2e,4FFA579VM5UA6G3BHJYNRH5Q5Q6S6W,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,MaxAmbiental,"{""latitude"":-14.235004,""longitude"":-51.92528}",Usina Petribu Renewable Generation with Sugarc...,
13170,R33WIRKXKCNOCGHYW84XRF3SJ0KKNI,Power generation from renewable sources  Aiur...,PA,Energy industries (renewable - / non-renewable...,Hydro,Withdrawn,CDM - ACM0002,RINA Services S.p.A. (RINA),2009-05-08 00:00:00,South America,...,CDM Registry,tCO2e,R33WIRKXKCNOCGHYW84XRF3SJ0KKNI,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,Waycarbon,"{""latitude"":-14.235004,""longitude"":-51.92528}",Power generation from renewable sources  Aiur...,
13171,GT5ME4TC32NKLA6A22ZBR00FYBC54N,Roaring 40s Wind Farms Private Limited.,PA,Energy industries (renewable - / non-renewable...,Wind,Validated,CDM - ACM0002,Det Norske Veritas- CUK,2007-06-07 00:00:00,Southern Asia,...,CDM Registry,tCO2e,GT5ME4TC32NKLA6A22ZBR00FYBC54N,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,2024-01-02,Satia Paper Mills,"{""latitude"":20.593684,""longitude"":78.96288}",Roaring 40s Wind Farms Private Limited.,


In [7]:

df_issuances = pd.read_excel("./input/CDM-Issuances-November.xlsx")
# df_issuances.drop('DOE', axis=1, inplace=True)
df_issuances.drop('PA/PoA', axis=1, inplace=True)
df_issuances.drop('CP', axis=1, inplace=True)
df_issuances.drop('Monitoring report number', axis=1, inplace=True)
df_issuances.drop('Last_updated', axis=1, inplace=True)
df_issuances.drop('Project type (UNEP DTU)', axis=1, inplace=True)
df_issuances.drop('Project subtype (UNEP DTU)', axis=1, inplace=True)
# df_issuances.drop('Units - Total', axis=1, inplace=True)
df_issuances.drop('Issuance date', axis=1, inplace=True)
df_issuances.drop('HostParty', axis=1, inplace=True)


issuances_columns={
    "CDM project reference number":"projectId",
    "DOE":"verificationBody",
    "Issuance process ID":"id",
    "Monitoring report started":"startDate",
    "Monitoring report ended":"endDate",
     "Units - Total":"unitCount",
}
df_issuances['Monitoring report started'] = pd.to_datetime(df_issuances['Monitoring report started'])
df_issuances['Monitoring report ended'] = pd.to_datetime(df_issuances['Monitoring report ended'])
df_issuances['CDM project reference number'] = df_issuances['CDM project reference number'].map(id_to_id)
df_issuances = df_issuances[issuances_columns.keys()].rename(columns=issuances_columns)


df_issuances['verificationReportDate']=df_issuances['projectId'].map(verification_report_date_map)
df_issuances['verificationApproach']="NA"



df_issuances = fillna_by_dtype(df_issuances)
print(len(df_issuances['projectId'].unique()))
df_issuances


# df_aggregated = df_issuances.groupby('projectId')['unitCount'].sum().reset_index()
df_iss_completed = pd.read_excel("./input/CDM_Issuance_Completed_November.xlsx")
df_iss_completed['Project number'] =df_iss_completed['Project number'].map(id_to_id)

df_iss_completed


def rename_duplicates(df, column_name):
   
    df_copy = df.copy()
    duplic = df_copy.duplicated(subset=column_name, keep=False)
    duplicates = df_copy[duplic].copy()
    duplicates['dup_count'] = duplicates.groupby(column_name).cumcount() + 1
    duplicates[column_name] = duplicates[column_name].astype(str) + '-' + duplicates['dup_count'].astype(str)
    duplicates.drop('dup_count', axis=1, inplace=True)
    df_copy.update(duplicates)
    return df_copy

print(len(df_iss_completed))
df_issuances = rename_duplicates(df_issuances, 'id')

df_issuances= df_issuances.merge(
    df_iss_completed, 
    left_on=['projectId', 'unitCount'], 
    right_on=['Project number', 'Total'],
    how='left'
)

df_issuances.drop('Completion date', axis=1, inplace=True)
df_issuances.drop('Transaction type', axis=1, inplace=True)
df_issuances.drop('Total', axis=1, inplace=True)
df_issuances.drop('Project number', axis=1, inplace=True)
df_issuances.drop('PARTY CODE', axis=1, inplace=True)
df_issuances.drop('CP', axis=1, inplace=True)
df_issuances.drop('Unit Type', axis=1, inplace=True)
df_issuances["Status"] = "Held"
df_issuances.drop_duplicates(subset='id', keep='first', inplace=True)
# df_dup_check = df_issuances[df_issuances.duplicated(subset='id', keep=False)]


# df_dup_check
# df_issuances.to_excel("propercleanissuance.xlsx")


nan_exists = df_issuances['SERIAL RANGE'].isna().any()
nan_exists

df_issuances

3703
12570


Unnamed: 0,projectId,verificationBody,id,startDate,endDate,unitCount,verificationReportDate,verificationApproach,Status,SERIAL RANGE
0,IBK3RIR7HVE1SZLK7YZMZPZ02UW7TS,KEarth,4KEarth1568615888.93,2017-04-01,2019-07-31,20519.0,2014-01-01,,Held,258030584 - 258051102
1,KZPP9BVTCRTYEUSRYB2L0ZNRJUDF2X,KEarth,4KEarth1568616446.5,2017-04-01,2019-07-31,18925.0,2014-05-31,,Held,258051103 - 258070027
2,P2UF559LX39J2VDPTZ13PAZUO3DRVV,KEarth,4KEarth1585297661.77,2018-01-01,2019-12-31,24595.0,2011-01-01,,Held,265420302 - 265444896
3,AXF2WJ2NOM4CHZGDOJW9S9DINIAQWS,KEarth,4KEarth1596186178.27,2018-01-01,2020-03-21,43219.0,2013-03-22,,Held,14955868 - 14999086
4,JOUI5BA4DLQS9MVXDKRLL1HHFVQ4M3,KEarth,4KEarth1599113457.49,2015-04-01,2017-12-31,116637.0,2013-10-10,,Held,294645212 - 294761848
...,...,...,...,...,...,...,...,...,...,...
12551,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,No info,sergeyf731618828753.52,2016-09-01,2017-09-30,1231148.0,2013-03-15,,Held,279718404 - 280949551
12552,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,No info,sergeyf731618828921.37,2017-10-01,2018-12-08,1128556.0,2013-03-15,,Held,280949552 - 282078107
12553,D4CRFLKI60AFK14YEHRDNPY16829DF,No info,sergeyf731650363867.4,2016-10-01,2017-06-30,434350.0,2012-11-01,,Held,1167744001 - 1168178350
12554,BN70H0KER9GB6Y0BPJBJ3Q4IBIXK7C,No info,slenzen1552989570.98,2017-01-01,2019-01-31,102514.0,2013-12-27,,Held,256330654 - 256433167


In [9]:
df_units = pd.read_excel("./input/CDM-Issuances-November.xlsx")


units_columns={
    "HostParty":"unitOwner",
    "CDM project reference number":"projectId",
    "Issuance process ID":"issuanceId",
    "DOE":"verificationBody",
    "Issuance process ID":"id",
   "Units - Total":"unitCount",
   "Issuance date":"vintageYear",

}

df_units = df_units[units_columns.keys()].rename(columns=units_columns)
df_units["unitOwner"] = df_units["unitOwner"].map(country_name_map)
df_units["projectId"] = df_units["projectId"].map(id_to_id)
df_units["countryJurisdictionOfOwner"] = df_units["unitOwner"]
df_units["inCountryJurisdictionOfOwner"] = ""
df_units["projectLocationId"] = ""
df_units["geographyIdentifier"] = df_units["unitOwner"].map(geography_map)
df_units["correspondingAdjustmentDeclaration"] = "Unknown"
df_units["unitRegistryLink"] = 'https://cdm.unfccc.int/Projects/Validation/DB/' +df_units['projectId']+"/view.html"
df_units["vintageYear"] = df_units["vintageYear"].astype(str).str[:4]
df_units["unitStatus"] = "Held"
df_units["unitType"] = ""
df_units["unitBlockStart"] = ""
df_units["unitBlockEnd"] = ""


df_units = fillna_by_dtype(df_units)

df_units

Unnamed: 0,unitOwner,projectId,id,verificationBody,unitCount,vintageYear,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,projectLocationId,geographyIdentifier,correspondingAdjustmentDeclaration,unitRegistryLink,unitStatus,unitType,unitBlockStart,unitBlockEnd
0,India,IBK3RIR7HVE1SZLK7YZMZPZ02UW7TS,4KEarth1568615888.93,KEarth,20519,2020,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,
1,India,KZPP9BVTCRTYEUSRYB2L0ZNRJUDF2X,4KEarth1568616446.5,KEarth,18925,2020,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,
2,India,P2UF559LX39J2VDPTZ13PAZUO3DRVV,4KEarth1585297661.77,KEarth,24595,2021,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,
3,Thailand,AXF2WJ2NOM4CHZGDOJW9S9DINIAQWS,4KEarth1596186178.27,KEarth,43219,2021,Thailand,,,"{""latitude"":15.870032,""longitude"":100.992541}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,
4,India,JOUI5BA4DLQS9MVXDKRLL1HHFVQ4M3,4KEarth1599113457.49,KEarth,116637,2022,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12543,India,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,sergeyf731618828753.52,No info,1231148,2022,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,
12544,India,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,sergeyf731618828921.37,No info,1128556,2022,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,
12545,China,D4CRFLKI60AFK14YEHRDNPY16829DF,sergeyf731650363867.4,No info,434350,2022,China,,,"{""latitude"":35.86166,""longitude"":104.195397}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,
12546,India,BN70H0KER9GB6Y0BPJBJ3Q4IBIXK7C,slenzen1552989570.98,No info,102514,2020,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,Held,,,


In [10]:
df_retired = pd.read_excel("./input/CDM_Retired_November.xlsx")
df_retired['PARTY CODE'] = df_retired['Project Identifier'].str.split('-').str[0]
df_retired['Project Identifier'] = df_retired['Project Identifier'].str.split('-').str[1].astype(int)



vintage_year_map= df_units.set_index('projectId')['vintageYear'].to_dict()
issuance_id_map= df_units.set_index('projectId')['id'].to_dict()
verif_body_units_map= df_units.set_index('projectId')['verificationBody'].to_dict()
geog_identifier_units_map = df_units.set_index('projectId')['geographyIdentifier'].to_dict()




# df_retired


retired_columns={
    "Status":"unitStatus",
    "Total":"unitTotal",
    "PARTY CODE":"unitOwner",
    "Project Identifier":"projectId",
    "Serial Range":"Serial Range",
    "Transaction Type":"unitStatus"
}



df_retired = df_retired[retired_columns.keys()].rename(columns=retired_columns)
df_retired[['unitBlockStart', 'unitBlockEnd']] = df_retired['Serial Range'].str.split('-', 1, expand=True)
df_retired['unitBlockStart'] = df_retired['unitBlockStart'].fillna(0).astype(int)
df_retired['unitBlockEnd'] = df_retired['unitBlockEnd'].fillna(0).astype(int)
df_retired.drop('Serial Range', axis=1, inplace=True)
df_retired['projectId']=df_retired['projectId'].map(id_to_id)

df_retired["unitOwner"]= df_retired['unitOwner'].map(country_name_map)
df_retired["countryJurisdictionOfOwner"] = df_retired["unitOwner"]
df_retired['inCountryJurisdictionOfOwner'] = ""


df_retired["unitType"] = ""
df_retired["correspondingAdjustmentDeclaration"] = "Unknown"
df_retired["unitRegistryLink"] = 'https://cdm.unfccc.int/Projects/Validation/DB/' +df_retired['projectId']+"/view.html"


df_needed = df_issuances[['projectId', 'id', 'startDate', 'endDate', 
           'unitCount', 'SERIAL RANGE']]
df_needed[['start', 'end']] = df_needed['SERIAL RANGE'].str.split(' - ', expand=True)

# Convert 'start' and 'end' to integers
df_needed['start'] = df_needed['start'].astype(int)
df_needed['end'] = df_needed['end'].astype(int)
df_needed['yearDiff'] = df_needed['endDate'].dt.year - df_needed['startDate'].dt.year
df_needed['countDiff'] = df_needed['end']-df_needed['start']




# Todo df_retired["vintageYear"]= df_retired['projectId'].map(vintage_year_map)
# Todo df_retired["id"]= df_retired['projectId'].map(issuance_id_map)
df_retired["verificationBody"]= df_retired['projectId'].map(verif_body_units_map)
df_retired['geographyIdentifier']= df_retired['projectId'].map(geog_identifier_units_map)
# Todo df_retired["projectLocationId"] = ""
df_retired = fillna_by_dtype(df_retired)
df_retired["unitStatus"] = "Retired"


# df_filtered
# len(df_filtered['id'].unique())

df_needed



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_needed[['start', 'end']] = df_needed['SERIAL RANGE'].str.split(' - ', expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_needed[['start', 'end']] = df_needed['SERIAL RANGE'].str.split(' - ', expand=True)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_needed['start'] = df_needed['s

Unnamed: 0,projectId,id,startDate,endDate,unitCount,SERIAL RANGE,start,end,yearDiff,countDiff
0,IBK3RIR7HVE1SZLK7YZMZPZ02UW7TS,4KEarth1568615888.93,2017-04-01,2019-07-31,20519.0,258030584 - 258051102,258030584,258051102,2,20518
1,KZPP9BVTCRTYEUSRYB2L0ZNRJUDF2X,4KEarth1568616446.5,2017-04-01,2019-07-31,18925.0,258051103 - 258070027,258051103,258070027,2,18924
2,P2UF559LX39J2VDPTZ13PAZUO3DRVV,4KEarth1585297661.77,2018-01-01,2019-12-31,24595.0,265420302 - 265444896,265420302,265444896,1,24594
3,AXF2WJ2NOM4CHZGDOJW9S9DINIAQWS,4KEarth1596186178.27,2018-01-01,2020-03-21,43219.0,14955868 - 14999086,14955868,14999086,2,43218
4,JOUI5BA4DLQS9MVXDKRLL1HHFVQ4M3,4KEarth1599113457.49,2015-04-01,2017-12-31,116637.0,294645212 - 294761848,294645212,294761848,2,116636
...,...,...,...,...,...,...,...,...,...,...
12551,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,sergeyf731618828753.52,2016-09-01,2017-09-30,1231148.0,279718404 - 280949551,279718404,280949551,1,1231147
12552,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,sergeyf731618828921.37,2017-10-01,2018-12-08,1128556.0,280949552 - 282078107,280949552,282078107,1,1128555
12553,D4CRFLKI60AFK14YEHRDNPY16829DF,sergeyf731650363867.4,2016-10-01,2017-06-30,434350.0,1167744001 - 1168178350,1167744001,1168178350,1,434349
12554,BN70H0KER9GB6Y0BPJBJ3Q4IBIXK7C,slenzen1552989570.98,2017-01-01,2019-01-31,102514.0,256330654 - 256433167,256330654,256433167,2,102513


In [11]:
df_retired

Unnamed: 0,unitStatus,unitTotal,unitOwner,projectId,unitStatus.1,unitBlockStart,unitBlockEnd,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,unitType,correspondingAdjustmentDeclaration,unitRegistryLink,verificationBody,geographyIdentifier
0,Retired,5294,Brazil,DWXTGTQLAORUROS9KPVZJUSGI8UK70,Retired,65142364,65147657,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,RINA,"{""latitude"":-14.235004,""longitude"":-51.92528}"
1,Retired,5294,Brazil,8J911GX7NLREEIL5L09MKJTI9USERE,Retired,60482913,60488206,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV NORD,"{""latitude"":-14.235004,""longitude"":-51.92528}"
2,Retired,5000,Brazil,O5C8ZV4YZTL3001AX5RD5V1OL5OM0A,Retired,57297956,57302955,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,DNV,"{""latitude"":-14.235004,""longitude"":-51.92528}"
3,Retired,5000,Brazil,HY2WB5CJ5DZTLBFLQE55ZHT0XK2DO2,Retired,61682096,61687095,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV SUD,"{""latitude"":-14.235004,""longitude"":-51.92528}"
4,Retired,5000,Brazil,0IET49KTMORLOP6NX7Q56DF7HXTMPV,Retired,57923146,57928145,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,DNV,"{""latitude"":-14.235004,""longitude"":-51.92528}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27741,Retired,3,Chile,L9H0347KVT9F7MAMD49TAYUJLMFHEW,Retired,41951955,41951957,Chile,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV NORD,"{""latitude"":-35.675147,""longitude"":-71.542969}"
27742,Retired,1,China,B90WLUX8QKF2591LN21LEVJ82HOO75,Retired,1185996159,1185996159,China,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,LGAI,"{""latitude"":35.86166,""longitude"":104.195397}"
27743,Retired,186,Malawi,YHNSTOHUC3NLV2APHHB6SCWA61Q46T,Retired,207935,208120,Malawi,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV SUD,"{""latitude"":-13.254308,""longitude"":34.301525}"
27744,Retired,129,India,82ORS4DFFAE3F7JI64VMOSYQEYXGCB,Retired,279636945,279637073,India,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,LGAI,"{""latitude"":20.593684,""longitude"":78.96288}"


In [12]:
df_merged = pd.merge(df_retired, df_needed, on='projectId', how='left', suffixes=('_retired', '_issuance'))

# Filter rows where unitBlockStart and unitBlockEnd are within the start and end range
df_linked = df_merged[(df_merged['unitBlockStart'] >= df_merged['start']) & (df_merged['unitBlockEnd'] <= df_merged['end'])]





def checker_playaround(projectId):

    filtered_braz = df_linked[df_linked['projectId'] == projectId]


    fil_ori = df_retired[df_retired['projectId']== projectId]

    fil_haha = df_needed[df_needed['projectId']== projectId]
    # filtered_braz.to_excel("testlook.xlsx")
    print("unique issuance id here is",filtered_braz['id'].unique())
    print("unique issuance id here is",fil_haha['id'].unique())
    print("total length 1",len(fil_ori))
    print("total length 2",len(filtered_braz))




df_linked

df_linked['startDate'] = pd.to_datetime(df_linked['startDate'])
df_linked['endDate'] = pd.to_datetime(df_linked['endDate'])

# Function to calculate vintage year
def calculate_vintage_year(row):
    total_years = row['endDate'].year - row['startDate'].year
    total_years = max(total_years, 1)  # Ensure total_years is at least 1
    units_per_year = row['unitCount'] / total_years
    year_offset = (row['unitBlockStart'] - row['start']) // units_per_year
    return row['startDate'].year + int(year_offset)

# Apply the function to each row
df_linked['vintageYear'] = df_linked.apply(calculate_vintage_year, axis=1)




checker_playaround("B90WLUX8QKF2591LN21LEVJ82HOO75")

df_linked_2016 = df_linked[df_linked['startDate'].dt.year >= 2016]


df_linked.columns

# total 3392 retired units
df_linked_2016

print("Issuances belong to retired units 2016",len(df_linked_2016['id'].unique()))


# after 2016 

# df_linked = df_linked[df_linked['startDate'].dt.year >= 2016]

# before 2016
df_linked = df_linked[df_linked['startDate'].dt.year < 2016]
print("Issuances belong to retired units before 2016",len(df_linked['id'].unique()))

df_linked

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_linked['startDate'] = pd.to_datetime(df_linked['startDate'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_linked['endDate'] = pd.to_datetime(df_linked['endDate'])


unique issuance id here is ['Applus1654080342.92-2' 'Applus1654080342.92-1']
unique issuance id here is ['Applus1654080342.92-1' 'Applus1654080342.92-2']
total length 1 66
total length 2 66
Issuances belong to retired units 2016 450
Issuances belong to retired units before 2016 1279


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_linked['vintageYear'] = df_linked.apply(calculate_vintage_year, axis=1)


Unnamed: 0,unitStatus,unitTotal,unitOwner,projectId,unitStatus.1,unitBlockStart,unitBlockEnd,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,unitType,...,id,startDate,endDate,unitCount,SERIAL RANGE,start,end,yearDiff,countDiff,vintageYear
0,Retired,5294,Brazil,DWXTGTQLAORUROS9KPVZJUSGI8UK70,Retired,65142364,65147657,Brazil,,,...,DNV-CUK1293110659.42,2000-11-10,2010-11-09,4072355.0,63069791 - 67142145,6.306979e+07,6.714214e+07,10.0,4072354.0,2005
6,Retired,5294,Brazil,8J911GX7NLREEIL5L09MKJTI9USERE,Retired,60482913,60488206,Brazil,,,...,DNV-CUK1299845791.9,2010-07-01,2011-02-28,157369.0,60445682 - 60603050,6.044568e+07,6.060305e+07,1.0,157368.0,2010
18,Retired,5000,Brazil,O5C8ZV4YZTL3001AX5RD5V1OL5OM0A,Retired,57297956,57302955,Brazil,,,...,DNV-CUK1264416604.37,2006-10-01,2009-09-30,89641.0,57287379 - 57377019,5.728738e+07,5.737702e+07,3.0,89640.0,2006
36,Retired,5000,Brazil,HY2WB5CJ5DZTLBFLQE55ZHT0XK2DO2,Retired,61682096,61687095,Brazil,,,...,TUEV-SUED1317711892.85,2011-06-01,2011-09-30,93646.0,61680223 - 61773868,6.168022e+07,6.177387e+07,0.0,93645.0,2011
43,Retired,5000,Brazil,0IET49KTMORLOP6NX7Q56DF7HXTMPV,Retired,57923146,57928145,Brazil,,,...,DNV-CUK1265191517.98,2008-06-01,2009-05-31,157914.0,57831988 - 57989901,5.783199e+07,5.798990e+07,1.0,157913.0,2008
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130472,Retired,3,Chile,L9H0347KVT9F7MAMD49TAYUJLMFHEW,Retired,41951955,41951957,Chile,,,...,Applus1654182155.66,2013-07-30,2019-03-17,1541464.0,41762547 - 43304010,4.176255e+07,4.330401e+07,6.0,1541463.0,2013
130476,Retired,1,China,B90WLUX8QKF2591LN21LEVJ82HOO75,Retired,1185996159,1185996159,China,,,...,Applus1654080342.92-2,2013-01-01,2017-12-04,707684.0,1185849127 - 1186556810,1.185849e+09,1.186557e+09,4.0,707683.0,2013
130478,Retired,186,Malawi,YHNSTOHUC3NLV2APHHB6SCWA61Q46T,Retired,207935,208120,Malawi,,,...,TUEV-SUED1446721135.84,2014-10-27,2015-08-05,35318.0,172850 - 208167,1.728500e+05,2.081670e+05,1.0,35317.0,2014
130479,Retired,129,India,82ORS4DFFAE3F7JI64VMOSYQEYXGCB,Retired,279636945,279637073,India,,,...,Applus1557750802.84-1,2013-01-01,2016-10-01,71478.0,279587171 - 279658648,2.795872e+08,2.796586e+08,3.0,71477.0,2015


In [13]:
df_retired_clean = df_linked.copy()


print(df_retired_clean.columns)



columns_df_linked = ['unitStatus', 'unitTotal', 'unitOwner', 'projectId', 'unitStatus',
                     'unitBlockStart', 'unitBlockEnd', 'countryJurisdictionOfOwner',
                     'inCountryJurisdictionOfOwner', 'unitType',
                     'correspondingAdjustmentDeclaration', 'unitRegistryLink',
                     'verificationBody', 'geographyIdentifier', 'id', 'startDate', 'endDate',
                     'unitCount', 'SERIAL RANGE', 'start', 'end', 'yearDiff', 'countDiff',
                     'vintageYear']

# List of columns in df_retired_need
columns_df_retired_need = ['unitStatus', 'unitTotal', 'unitOwner', 'projectId', 'unitStatus',
                           'unitBlockStart', 'unitBlockEnd', 'countryJurisdictionOfOwner',
                           'inCountryJurisdictionOfOwner', 'unitType',
                           'correspondingAdjustmentDeclaration', 'unitRegistryLink',
                           'verificationBody', 'geographyIdentifier', 'vintageYear']

# Columns in df_retired_need but not in df_linked
columns_not_in_linked = set(columns_df_retired_need) - set(columns_df_linked)

# Print the result
print("not in the linke dis", columns_not_in_linked)

# WILL NEED ID LATER
df_retired_clean = df_retired_clean[['unitStatus', 'unitTotal', 'unitOwner', 'projectId', 'unitStatus',
       'unitBlockStart', 'unitBlockEnd', 'countryJurisdictionOfOwner',
       'inCountryJurisdictionOfOwner', 'unitType',
       'correspondingAdjustmentDeclaration', 'unitRegistryLink',
       'verificationBody', 'geographyIdentifier','vintageYear','id']]

df_retired_clean.rename(columns={'unitTotal': 'unitCount'}, inplace=True)

df_retired_clean = df_retired_clean.loc[:,~df_retired_clean.columns.duplicated()]

print(len(df_retired_clean.columns))
df_retired_clean.columns


bhutan_projects = df_project[df_project['country'] == 'Bhutan']
bhutan_project_ids = bhutan_projects['projectId'].tolist()
# Filter df_issuance to remove rows where 'projectId' matches those in bhutan_project_ids
df_retired_clean= df_retired_clean[~df_retired_clean['projectId'].isin(bhutan_project_ids)]


df_retired_clean



Index(['unitStatus', 'unitTotal', 'unitOwner', 'projectId', 'unitStatus',
       'unitBlockStart', 'unitBlockEnd', 'countryJurisdictionOfOwner',
       'inCountryJurisdictionOfOwner', 'unitType',
       'correspondingAdjustmentDeclaration', 'unitRegistryLink',
       'verificationBody', 'geographyIdentifier', 'id', 'startDate', 'endDate',
       'unitCount', 'SERIAL RANGE', 'start', 'end', 'yearDiff', 'countDiff',
       'vintageYear'],
      dtype='object')
not in the linke dis set()
15


Unnamed: 0,unitStatus,unitCount,unitOwner,projectId,unitBlockStart,unitBlockEnd,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,unitType,correspondingAdjustmentDeclaration,unitRegistryLink,verificationBody,geographyIdentifier,vintageYear,id
0,Retired,5294,Brazil,DWXTGTQLAORUROS9KPVZJUSGI8UK70,65142364,65147657,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,RINA,"{""latitude"":-14.235004,""longitude"":-51.92528}",2005,DNV-CUK1293110659.42
6,Retired,5294,Brazil,8J911GX7NLREEIL5L09MKJTI9USERE,60482913,60488206,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV NORD,"{""latitude"":-14.235004,""longitude"":-51.92528}",2010,DNV-CUK1299845791.9
18,Retired,5000,Brazil,O5C8ZV4YZTL3001AX5RD5V1OL5OM0A,57297956,57302955,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,DNV,"{""latitude"":-14.235004,""longitude"":-51.92528}",2006,DNV-CUK1264416604.37
36,Retired,5000,Brazil,HY2WB5CJ5DZTLBFLQE55ZHT0XK2DO2,61682096,61687095,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV SUD,"{""latitude"":-14.235004,""longitude"":-51.92528}",2011,TUEV-SUED1317711892.85
43,Retired,5000,Brazil,0IET49KTMORLOP6NX7Q56DF7HXTMPV,57923146,57928145,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,DNV,"{""latitude"":-14.235004,""longitude"":-51.92528}",2008,DNV-CUK1265191517.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130472,Retired,3,Chile,L9H0347KVT9F7MAMD49TAYUJLMFHEW,41951955,41951957,Chile,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV NORD,"{""latitude"":-35.675147,""longitude"":-71.542969}",2013,Applus1654182155.66
130476,Retired,1,China,B90WLUX8QKF2591LN21LEVJ82HOO75,1185996159,1185996159,China,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,LGAI,"{""latitude"":35.86166,""longitude"":104.195397}",2013,Applus1654080342.92-2
130478,Retired,186,Malawi,YHNSTOHUC3NLV2APHHB6SCWA61Q46T,207935,208120,Malawi,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV SUD,"{""latitude"":-13.254308,""longitude"":34.301525}",2014,TUEV-SUED1446721135.84
130479,Retired,129,India,82ORS4DFFAE3F7JI64VMOSYQEYXGCB,279636945,279637073,India,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,LGAI,"{""latitude"":20.593684,""longitude"":78.96288}",2015,Applus1557750802.84-1


In [14]:
df_units_linked= df_linked.copy()

print("lenoriginal dfUnitsHeld",len(df_units_linked))

grouped_total = df_units_linked.groupby('id')['unitTotal'].sum().reset_index()
grouped_total.rename(columns={'unitTotal': 'aggregatedUnitTotal'}, inplace=True)

# Step 2: Merge the aggregated data back into df_linked
df_units_held = df_units_linked.merge(grouped_total, on='id', how='left')

# Step 3: Calculate totalHeld
df_units_held['totalHeld'] = df_units_held['unitCount'] - df_units_held['aggregatedUnitTotal']

totalHeld_mapping = df_units_held.set_index('id')['totalHeld'].to_dict()




df_units_held

print("len df units held",len(df_units_held))
totalHeld_mapping
duplicate_rows = df_units_held[df_units_held['id'].duplicated()]

# df_units_held = df_issuances[df_issuances['startDate'].dt.year >= 2016]
df_units_held = df_issuances[df_issuances['startDate'].dt.year < 2016]


print("len df units held",len(df_units_held))

df_units_held['totalHeld'] = df_issuances['id'].map(totalHeld_mapping)
df_units_held['totalHeld'] = df_units_held['totalHeld'].fillna(df_units_held['unitCount'])
# df_see = df_units_held[['id','totalHeld','unitCount']]

df_units_held.drop('unitCount',axis=1,inplace=True)

print("df_units_held column is",df_units_held.columns)



df_units_held.rename(columns={'totalHeld': 'unitCount'}, inplace=True)


# df_units_held.to_excel("df_heldinspect.xlsx")
print(df_units_held.columns)

df_units_held[['unitBlockStart', 'unitBlockEnd']] = df_units_held['SERIAL RANGE'].str.split('-', 1, expand=True)
df_units_held.drop('SERIAL RANGE', axis=1, inplace=True)

df_units_held['unitOwner'] = df_units_held['projectId'].map(projID_to_country)

df_units_held["countryJurisdictionOfOwner"] = df_units_held["unitOwner"]
df_units_held["inCountryJurisdictionOfOwner"] = ""
df_units_held["projectLocationId"] = ""


df_units_held.rename(columns={'Project number': 'projectId'}, inplace=True)
df_units_held["geographyIdentifier"] = df_units_held["unitOwner"].map(geography_map)
df_units_held.rename(columns={'Status': 'unitStatus'}, inplace=True)
df_units_held['unitRegistryLink'] = 'https://cdm.unfccc.int/Projects/Validation/DB/' +df_units_held['projectId']+"/view.html"
df_units_held['correspondingAdjustmentDeclaration'] = "Unknown"
df_units_held['unitType'] =""
df_units_held['vintageYear'] =df_units_held['startDate'].dt.year



columns_to_drop = ['verificationReportDate', 'startDate', 'projectLocationId', 
                   'endDate', 'verificationApproach']

# Dropping the columns
df_units_held = df_units_held.drop(columns=columns_to_drop)

df_units_held.columns


print(set(df_units_held.columns)-set(df_retired_clean.columns))
print(set(df_retired_clean.columns)-set(df_units_held.columns))


print(df_retired_clean.columns)

print(df_units_held.columns)
print(len(df_units_held.columns)-len(df_retired_clean.columns))


df_units_held

# print(df_see['totalHeld'].unique())

df_units_held['vintageYear'].unique()
df_units_held['unitBlockEnd'] = pd.to_numeric(df_units_held['unitBlockEnd'], errors='coerce')
df_units_held['unitCount'] = pd.to_numeric(df_units_held['unitCount'], errors='coerce')

# Update 'unitBlockStart' based on the calculation
df_units_held['unitBlockStart'] = df_units_held['unitBlockEnd'] - df_units_held['unitCount']
bhutan_projects = df_project[df_project['country'] == 'Bhutan']
bhutan_project_ids = bhutan_projects['projectId'].tolist()
# Filter df_issuance to remove rows where 'projectId' matches those in bhutan_project_ids
df_units_held= df_units_held[~df_units_held['projectId'].isin(bhutan_project_ids)]




df_units_held
# df_units_held

lenoriginal dfUnitsHeld 24357
len df units held 24357
len df units held 10997
df_units_held column is Index(['projectId', 'verificationBody', 'id', 'startDate', 'endDate',
       'verificationReportDate', 'verificationApproach', 'Status',
       'SERIAL RANGE', 'totalHeld'],
      dtype='object')
Index(['projectId', 'verificationBody', 'id', 'startDate', 'endDate',
       'verificationReportDate', 'verificationApproach', 'Status',
       'SERIAL RANGE', 'unitCount'],
      dtype='object')
set()
set()
Index(['unitStatus', 'unitCount', 'unitOwner', 'projectId', 'unitBlockStart',
       'unitBlockEnd', 'countryJurisdictionOfOwner',
       'inCountryJurisdictionOfOwner', 'unitType',
       'correspondingAdjustmentDeclaration', 'unitRegistryLink',
       'verificationBody', 'geographyIdentifier', 'vintageYear', 'id'],
      dtype='object')
Index(['projectId', 'verificationBody', 'id', 'unitStatus', 'unitCount',
       'unitBlockStart', 'unitBlockEnd', 'unitOwner',
       'countryJurisdictio

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_units_held['totalHeld'] = df_issuances['id'].map(totalHeld_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_units_held['totalHeld'] = df_units_held['totalHeld'].fillna(df_units_held['unitCount'])
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_units_held.drop('unitCount',axis=1,inplace=True)
A value is trying to be set on a copy of a slice

Unnamed: 0,projectId,verificationBody,id,unitStatus,unitCount,unitBlockStart,unitBlockEnd,unitOwner,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,geographyIdentifier,unitRegistryLink,correspondingAdjustmentDeclaration,unitType,vintageYear
4,JOUI5BA4DLQS9MVXDKRLL1HHFVQ4M3,KEarth,4KEarth1599113457.49,Held,116637.0,294645211.0,294761848,India,India,,"{""latitude"":20.593684,""longitude"":78.96288}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2015
19,3S36PU919D94NR86U2XFCMDKMOAQ7A,KEarth,4KEarth1640788887.99,Held,539881.0,316763401.0,317303282,India,India,,"{""latitude"":20.593684,""longitude"":78.96288}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2013
20,RSHGKPZYMN0MDMI7OP4LEA4ULNIVY1,KEarth,4KEarth1641211288.18-1,Held,241873.0,324764016.0,325005889,India,India,,"{""latitude"":20.593684,""longitude"":78.96288}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2013
21,RSHGKPZYMN0MDMI7OP4LEA4ULNIVY1,KEarth,4KEarth1641211288.18-2,Held,867.0,325005889.0,325006756,India,India,,"{""latitude"":20.593684,""longitude"":78.96288}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2012
26,0LXRXT5CK6WUWY5Q9RSNQI6RSXFGS8,KEarth,4KEarth1648863914.34-1,Held,66817.0,321126168.0,321192985,India,India,,"{""latitude"":20.593684,""longitude"":78.96288}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2013
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12545,NIPF5RRFPIJB9Q89LNDCRQWB51GBXQ,Earthood,poaiss997180415~NG,Held,7.0,3426126.0,3426133,Ghana,Ghana,,"{""latitude"":7.946527,""longitude"":-1.023194}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2013
12546,L5NM3ZT1NP8125Z00TJ5E6XNLZ3MIV,Carbon Check,poaiss998042239,Held,27286.0,168055.0,195341,Ghana,Ghana,,"{""latitude"":7.946527,""longitude"":-1.023194}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2014
12547,AYYW2EJ3HYUNKG1IW90202KP44Z2NM,KBS,poaiss999064138~TH,Held,40659.0,12580003.0,12620662,Thailand,Thailand,,"{""latitude"":15.870032,""longitude"":100.992541}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2015
12549,IO57673HO8JNI4YN42UGAYDXU76BXA,No info,sergeyf731608560024.14,Held,2485.0,15253612.0,15256097,Thailand,Thailand,,"{""latitude"":15.870032,""longitude"":100.992541}",https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,,2013


In [15]:
# CHecking qc
df_units_held.to_excel("./output/CDM_Held_Units_Cleaned.xlsx")
df_retired_clean.to_excel("./output/CDM_Retired_Units_Cleaned.xlsx")
# df_issuances.to_excel("./output/CDM_Issuances_Cleaned.xlsx")

df_issuances_cleaned = df_issuances[["verificationBody","projectId","id","startDate","endDate","verificationReportDate","verificationApproach"]]
# df_issuances_cleaned =df_issuances_cleaned[df_issuances_cleaned['startDate'].dt.year >= 2016]


df_issuances_cleaned =df_issuances_cleaned[df_issuances_cleaned['startDate'].dt.year < 2016]



bhutan_projects = df_project[df_project['country'] == 'Bhutan']
bhutan_project_ids = bhutan_projects['projectId'].tolist()
# Filter df_issuance to remove rows where 'projectId' matches those in bhutan_project_ids
df_issuances_cleaned= df_issuances_cleaned[~df_issuances_cleaned['projectId'].isin(bhutan_project_ids)]


unique_ids_held = df_units_held['id'].unique()
unique_ids_retired = df_retired_clean['id'].unique()
unique_ids_issuances = df_issuances_cleaned['id'].unique()


# df_filtered = df_needed[df_needed['startDate'].dt.year >= 2016]



print(set((unique_ids_retired))-set((unique_ids_held)))
print(set((unique_ids_retired))-set((unique_ids_issuances)))
print(set((unique_ids_issuances))-set((unique_ids_held)))
print(set((unique_ids_held))-set((unique_ids_issuances)))

def convert_timestamp_to_string(x):
    if isinstance(x, pd.Timestamp):
        return x.strftime('%Y-%m-%d %H:%M:%S')
    return x

# Columns that contain Timestamps
timestamp_columns = ['startDate', 'endDate', 'verificationReportDate']

# Apply the conversion to the specific columns

# Apply the conversion to the existing timestamp columns
df_issuances_cleaned[timestamp_columns] = df_issuances_cleaned[timestamp_columns].applymap(convert_timestamp_to_string)
issuances_grouped = df_issuances_cleaned.groupby('projectId').apply(lambda x: x.to_dict(orient='records')).reset_index(name='issuances')

issuances_grouped['issuances'][0]


df_issuances_cleaned

# df_issuances_cleaned

issuances_grouped.to_excel("./output/grouped_issuances_bef_2016.xlsx")

df_issuances_cleaned
issuances_grouped


set()
set()
set()
set()


Unnamed: 0,projectId,issuances
0,00J4UE52C4VTT01JSVE9ERDU8WP76W,"[{'verificationBody': 'RINA', 'projectId': '00..."
1,00MPVBXC6YBLA33C1NN1G3K33VHK1V,"[{'verificationBody': 'TUV NORD', 'projectId':..."
2,015ESW8TY1JO2B2E0T0EP4ZZQKQM5D,"[{'verificationBody': 'TSUS', 'projectId': '01..."
3,01BHKDBIJYIMU1ZSLWRZX238VM5FJU,"[{'verificationBody': 'BVCH', 'projectId': '01..."
4,01GN1N0RHE2KY6CRDPWDNY7ELCZKNT,"[{'verificationBody': 'LGAI', 'projectId': '01..."
...,...,...
3559,ZYXH65I6FVRCHO68AS8K64WYFS3V2O,"[{'verificationBody': 'BVCH', 'projectId': 'ZY..."
3560,ZZ2UA0ANWJAF7LL3BE60P5HG1LRLAJ,"[{'verificationBody': 'LGAI', 'projectId': 'ZZ..."
3561,ZZ99SW9AQ4MM9WTA4H7WGK2IQZ9BZK,"[{'verificationBody': 'KTR', 'projectId': 'ZZ9..."
3562,ZZK6XB1XUWV596IZZ4H06G6E9KBC1G,"[{'verificationBody': 'LGAI', 'projectId': 'ZZ..."


In [106]:

url_project_local = "http://localhost:31310/v1/projects"


def datetime_to_string(value):
    if isinstance(value, datetime):
        return value.strftime("%Y-%m-%d")
    return value
# Function to send POST request for each row in the DataFrame
def post_projects(df,start_idx,end_idx,url,type,id_mapping,issuance_mapping):
    # Iterate over each row in the DataFrame
    project_to_uuid_mapping = {}
    i=0
    for index, row in df[start_idx:end_idx].iterrows():
    
        request_body = row.to_dict()
        request_body = {k: datetime_to_string(v) for k, v in row.to_dict().items()}
        request_body['projectLocations'] = [
            {
                "country": request_body.get('country', 'Unknown'),
                "inCountryRegion": request_body.get('inCountryRegion', 'Unknown'),
                "geographicIdentifier":request_body.get('geographicIdentifier',{})  # replace with actual geographicIdentifier if available
            }
        ]

        issuance_ids = []
        issuances_for_api = []

        # Uncomment Later 
        if row.get('issuances') is not None and isinstance(row.get('issuances'), list):
            for issuance in row.get('issuances', []):
                # Store the 'id' for later use
                issuance_id = issuance.get('id')
                issuance_ids.append(issuance_id)
                
                # Remove 'id' from the issuance data sent to the API
                issuance_for_api = {key: value for key, value in issuance.items() if key not in ['id', 'projectId']}
                issuances_for_api.append(issuance_for_api)
            request_body['issuances'] = issuances_for_api
        else:

            request_body.pop('issuances', None)
        # request_body.pop('issuances', None)
        # print(issuances_for_api)
        
        # Remove the keys that are not needed for the POST request
        request_body.pop('country', None)
        request_body.pop('inCountryRegion', None)
        request_body.pop('geographicIdentifier', None)


        est = request_body['estimations'][0]
        # print(est)

        if est['creditingPeriodStart']== None or est['creditingPeriodEnd']==None:
            request_body.pop('estimations', None)

        # if request_body['country']=='NA':
        #     request_body['country']= "Namibia"

        # print(request_body)
        # Send POST request
        try:
            if type=="create":
                response = requests.post(url, json=request_body)
                # Check if the request was successful
            if type=="put":
                warehouseId = id_mapping[request_body['projectId']]
                print(warehouseId)
                request_body['warehouseProjectId'] =warehouseId
                response =requests.put(url,json=request_body)
            
            if response.status_code == 200:
                response_data = response.json()
                print(i)
                print(f"Successfully posted project with ID: {row['projectId']}, UUID: {response_data.get('uuid')}")
                
            else:
                print("i is",i)
                print(request_body)
                print(f"Failed to post project with ID: {row['projectId']}. Status code: {response.status_code}. Message: {response.json()}")
                break
        

        except RequestException as e:
            print(request_body)
            print(f"Request failed for project with ID:{row['projectId']}. Error: {e}")
            break

        i+=1
    return project_to_uuid_mapping
# Call the function to start posting data

def put_project(df,start_idx,end_idx,url):
    i=0
    for index, row in df[start_idx:end_idx].iterrows():
    
        request_body = row.to_dict()
        request_body = {k: datetime_to_string(v) for k, v in row.to_dict().items()}

   
        issuance_ids = []
        issuances_for_api = []

        # Uncomment Later 
        if row.get('issuances') is not None and isinstance(row.get('issuances'), list):
            for issuance in row.get('issuances', []):
                # Store the 'id' for later use
                issuance_id = issuance.get('id')
                issuance_ids.append(issuance_id)
                
                # Remove 'id' from the issuance data sent to the API
                issuance_for_api = {key: value for key, value in issuance.items() if key not in ['orgUid']}
                issuances_for_api.append(issuance_for_api)
            request_body['issuances'] = issuances_for_api
        else:

            request_body.pop('issuances', None)
        request_body['projectLocations'] = [{k: v for k, v in location.items() if k not in ['orgUid', 'warehouseProjectId']} for location in request_body['projectLocations'] ]
        try:
            response =requests.put(url,json=request_body)
            
            if response.status_code == 200:
                response_data = response.json()
                print(f"Successfully posted project with ID: {row['projectId']},")
                
            else:
                print("i is",i)
                print(request_body)
                print(f"Failed to post project with ID: {row['originProjectId']}. Status code: {response.status_code}. Message: {response.json()}")
                break
        
        except RequestException as e:
            print(request_body)
            print(f"Request failed for project with ID: {row['originProjectId']}. Error: {e}")
            break

        i+=1

        
    


merged_project_issuances = pd.merge(df_cleaned_project, issuances_grouped, on='projectId', how='left')

# df_project

url_staging_local = "http://localhost:31310/v1/staging"
url_staging_testnet = "http://159.223.67.223:31310/v1/staging"

def get_projects_staging_uuid(url):
    # url = "http://localhost:31310/v1/staging"

    # Make a GET request
    response = requests.get(url)

    
    # Check the response status code
    if response.status_code == 200:
        # The request was successful, and you can access the response content
        projects = response.json()
        project_id_to_uuid = {}
        project_id_to_issuances={}
        project_id_to_locationId={}
        for project in projects:

            uuid = project['uuid']
            # print(uuid)
            projectId = project['diff']['change'][0]['projectId']
            project_id_to_uuid[projectId] = uuid
            issuances = project['diff']['change'][0].get('issuances', None)

            

        # print(uuid_to_project_id)
        return project_id_to_uuid, project_id_to_issuances
    else:
        # The request failed for some reason
        print(f"Request failed with status code {response.status_code}")


def get_projects_mapping(base_url,orgUid):
    # Check if the specified conditions are met

    # base_url = "http://localhost:31310/v1/projects"
    page = 1
    limit = 1000
    all_projects = []

    while True:
        print(page)
        
        url = f"{base_url}?page={page}&limit={limit}&orgUid={orgUid}"
        print(url)
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to retrieve data: {response.status_code}")
            break

        data = response.json()
        all_projects.extend(data['data'])
        # if page >= 1:
        if page >= data['pageCount']:
            break
        page += 1
    print("hah")
    
    projects = all_projects
    # print(projects)

    # response = requests.get(url+orgUid)
    # projects = response.json()
    # project_data = response.json()
    project_id_to_warehouseId = {}
    project_id_to_issuances={}
    project_id_to_locationId={}
    duplicate_projects = {}
    project_id_to_locations={}

    # print(projects[0])

    for project_data in projects:
        if (project_data["orgUid"] == orgUid and
            project_data["registryOfOrigin"] == "CDM Registry"):


            warehouseId = project_data["warehouseProjectId"]
            projectId = project_data["projectId"]
            # Mapping of 'projectId' and 'warehouseProjectId'
            if projectId not in project_id_to_warehouseId:
                project_id_to_warehouseId[projectId] = warehouseId

            else:
                duplicate_projects[projectId] = [project_id_to_warehouseId[projectId],warehouseId]
                project_id_to_warehouseId[projectId] = warehouseId
            
            # Mapping of 'projectId' and 'issuance' if 'issuance' exists
            issuance_ids = []
            issuances_for_api = []
        
            if project_data.get('issuances') is not None and isinstance(project_data.get('issuances'), list):
               
                for issuance in project_data.get('issuances', []):
                    # Store the 'id' for later use
                    issuance_id = issuance.get('id')
                    issuance_ids.append(issuance_id)
                    
                    # Remove 'id' from the issuance data sent to the API
                    issuance_for_api = {key: value for key, value in issuance.items() }
                    issuances_for_api.append(issuance_for_api)

                if issuances_for_api!= []:
                    project_id_to_issuances[projectId] = issuances_for_api

            # Mapping of 'projectId' and 'id' of 'projectLocations'
            locations_mapping = {}
            if project_data.get("projectLocations"):
                # print("lho")
                project_id_to_locationId[projectId]= project_data["projectLocations"][0]["id"]
                project_id_to_locations[projectId]= project_data["projectLocations"]

    return project_id_to_warehouseId, project_id_to_issuances, project_id_to_locationId,duplicate_projects,project_id_to_locations

        





# fail in project num 6142

# check if there is a same project but several uuid

# print(uuid_to_project_id_map)


# merged_project_issuances.to_excel("./output/ProjectsIssuancesCombined.xlsx")


In [107]:
# project_to_uuid_map,project_to_issuances = get_projects_staging_uuid(url_staging_testnet)

# print(len(project_to_uuid_map))


# Below is testnet"

orgUidTestnet = "fc7e4380bc8ba22f98a655e04b54af3aed122db34bb3357b3a9c3eb0ff7039bc"
orgUidMainnet = "b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985"

get_projects_url_local = "http://localhost:31310/v1/projects"

# Below is mainnet
# get_projects_url_local = "http://localhost:31310/v1/projects?orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985"

project_id_to_warehouseId, issuance_mapping, locations_mapping,duplicate_projects_api,project_id_to_locations= get_projects_mapping(get_projects_url_local,orgUidMainnet)


print(len(project_id_to_warehouseId))
print(len(issuance_mapping))
print(len(locations_mapping))
print(len(duplicate_projects_api))


1
http://localhost:31310/v1/projects?page=1&limit=1000&orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985
2
http://localhost:31310/v1/projects?page=2&limit=1000&orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985
3
http://localhost:31310/v1/projects?page=3&limit=1000&orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985
4
http://localhost:31310/v1/projects?page=4&limit=1000&orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985
5
http://localhost:31310/v1/projects?page=5&limit=1000&orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985
6
http://localhost:31310/v1/projects?page=6&limit=1000&orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985
7
http://localhost:31310/v1/projects?page=7&limit=1000&orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985
8
http://localhost:31310/v1/projects?page=8&limit=1000&orgUid=b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d

In [110]:
print(len(project_id_to_warehouseId))
print(len(issuance_mapping))
print(len(locations_mapping))
print(len(duplicate_projects_api))




unique_proj_iss_before = issuances_grouped['projectId'].unique()

print("len unique cleaned Issuances before API",len(unique_proj_iss_before))
print("len cleaned Issuances before API",len(issuances_grouped))


# print("diff1",set((unique_proj_iss_before))-set((issuance_mapping)))
# print("diff2",set((issuance_mapping))-set((unique_proj_iss_before)))
print("len diff1",len(set((unique_proj_iss_before))-set((issuance_mapping))))
print("len diff2",len(set((issuance_mapping))-set((unique_proj_iss_before))))





delta_uploaded_issuances = set((unique_proj_iss_before))-set((issuance_mapping))

# for delta in delta_uploaded_issuances:
#     print(project_id_to_warehouseId[delta])


project_id_to_warehouseId["HG20LFVJPFURJJ4M6E6MS07HTOZTR4"]

# issuance_mapping
# issuances_grouped.to_excel("asdasd.xlsx")

# len(issuance_mapping['U2ODRRPWUQQ6QTZ7ZILTXXHF5971KB'])
temp = issuances_grouped[issuances_grouped['projectId']== 'HG20LFVJPFURJJ4M6E6MS07HTOZTR4']

temp['issuances']

# this function never been used
def get_warehouseId_and_update_issuances(delta_uploaded_issuances, project_id_to_warehouseId, issuances_grouped, specific_project_id):

    # Print warehouse IDs for each project ID in delta_uploaded_issuances
    for delta in delta_uploaded_issuances:
        if delta in project_id_to_warehouseId:
            print(project_id_to_warehouseId[delta])
        else:
            print(f"Warehouse ID not found for project ID: {delta}")

    # Get 'issuances' data for a specific project ID
    specific_issuances = issuances_grouped[issuances_grouped['projectId'] == specific_project_id]['issuances']
    # print(len(specific_issuances['issuances']))
    return specific_issuances

def concat_existing_new_issuances(issuance_from_api, issuance_to_add):
    # Create a new dictionary to hold the combined data
    new_issuance_data = {}

    # Iterate through each row in the DataFrame
    for index, row in issuance_to_add.iterrows():
        project_id = row['projectId']
        # Create a new list of issuances without 'projectId' and 'id' keys
        new_issuances = [{k: v for k, v in issuance.items() if k not in ['projectId', 'id']} for issuance in row['issuances']]
        # Check if the projectId exists in the original dictionary
       
        if project_id in issuance_from_api:
            # Concatenate the existing and new issuances

          
            combined_issuances = issuance_from_api[project_id] + new_issuances
        else:
            # Use only the new issuances
            combined_issuances = new_issuances


        # Update the new dictionary
        new_issuance_data[project_id] = combined_issuances

    return new_issuance_data

def getAllCdmProjects(orgUid):
    base_url = "http://localhost:31310/v1/projects"
    page = 1
    limit = 2000
    all_projects = []

    while True:
        print(page)
        url = f"{base_url}?page={page}&limit={limit}&orgUid={orgUid}"
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to retrieve data: {response.status_code}")
            break

        data = response.json()
        all_projects.extend(data['data'])

        if page >= data['pageCount']:
            break
        page += 1

    return pd.DataFrame(all_projects)

def clean_estimations(estimations):
    # This function takes a list of estimation dictionaries and removes the specified keys
    for estimation in estimations:
        estimation.pop('warehouseProjectId', None)  # Remove 'warehouseProjectId' if exists
        estimation.pop('orgUid', None)              # Remove 'orgUid' if exists
        estimation.pop('timeStaged', None)   
    return estimations

def clean_project_locations(locations):
    # This function takes a list of estimation dictionaries and removes the specified keys
    for location in locations:
        location.pop('warehouseProjectId', None)  # Remove 'warehouseProjectId' if exists
        location.pop('orgUid', None)        
        location.pop('timeStaged', None)        
        
              # Remove 'orgUid' if exists
    return locations



def updateNewIssuances(df,issuances_data,start,end,env):
    df_filtered = df[df['warehouseProjectId'].isin(issuances_data['warehouseProjectId'])]
    issuance_map = dict(zip(issuances_data['warehouseProjectId'], issuances_data['issuances']))
    df_filtered['issuances'] = df_filtered['warehouseProjectId'].map(issuance_map)
    # Apply the function to the 'estimations' column
    df_filtered['estimations'] = df_filtered['estimations'].apply(clean_estimations)
    df_filtered['projectLocations'] = df_filtered['projectLocations'].apply(clean_project_locations)
    df_filtered.drop("projectRatings",axis=1,inplace=True)
    df_filtered.drop("labels",axis=1,inplace=True)
    df_filtered.drop("relatedProjects",axis=1,inplace=True)
    df_filtered.drop("coBenefits",axis=1,inplace=True)
    # if env=="testnet":
    df_filtered.drop("timeStaged",axis=1,inplace=True)
  
    
    
    
    
    
    i=0
    # dropping_columns = ["labels","relatedProjects","createdAt","updatedAt","orgUid"]
    for index, row in df_filtered[start:end].iterrows():
    
        request_body = row.to_dict()
        request_body = {k: datetime_to_string(v) for k, v in row.to_dict().items()}

        request_body.pop('orgUid', None)
        


        issuance_ids = []
        issuances_for_api = []


        # Uncomment Later 
        if row.get('issuances') is not None and isinstance(row.get('issuances'), list):
            for issuance in row.get('issuances', []):
                # Store the 'id' for later use
                issuance_id = issuance.get('id')
                issuance_ids.append(issuance_id)
                
                # Remove 'id' from the issuance data sent to the API
                issuance_for_api = {key: value for key, value in issuance.items() if key not in [ 'orgUid','projectId','timeStaged']}
                issuances_for_api.append(issuance_for_api)
            request_body['issuances'] = issuances_for_api
        else:

            request_body.pop('issuances', None)

        if env=="testnet":
            request_body.pop('estimations', None)
           

        try:
            urlPut = "http://localhost:31310/v1/projects"
            response = requests.put(urlPut, json=request_body)
           
            if response.status_code == 200:
                response_data = response.json()
                print(i)
                print(f"Successfully posted project with ID: {row['warehouseProjectId']}")
                
            else:
                print("i is",i)
                print(request_body)
                print(f"Failed to post project with ID: {row['warehouseProjectId']}. Status code: {response.status_code}. Message: {response.json()}")
                break
        

        except RequestException as e:
            print(request_body)
            print(f"Request failed for project with ID:{row['warehouseProjectId']}. Error: {e}")
            break

        i+=1
        
    



    return df_filtered



    


issuance_for_api_PUT = concat_existing_new_issuances(issuance_mapping,issuances_grouped)

print(len(set(issuance_for_api_PUT)))
# print(issuance_for_api_PUT)

data = list(issuance_for_api_PUT.items())

# Create DataFrame from the list of tuples
df_issuances_extended = pd.DataFrame(data, columns=['projectId', 'issuances'])

# Display the DataFrame
df_issuances_extended['warehouseProjectId']= df_issuances_extended['projectId'].map(project_id_to_warehouseId)

print(len(df_issuances_extended['warehouseProjectId'].unique()))
df_issuances_extended







13154
1006
13154
0
len unique cleaned Issuances before API 3564
len cleaned Issuances before API 3564
len diff1 2695
len diff2 137
3564
3564


Unnamed: 0,projectId,issuances,warehouseProjectId
0,00J4UE52C4VTT01JSVE9ERDU8WP76W,"[{'verificationBody': 'RINA', 'startDate': '20...",d9716c37-3370-4903-bf0b-6bad99c015f0
1,00MPVBXC6YBLA33C1NN1G3K33VHK1V,"[{'verificationBody': 'TUV NORD', 'startDate':...",d6e80c17-6719-44e2-9687-e884780340cc
2,015ESW8TY1JO2B2E0T0EP4ZZQKQM5D,"[{'verificationBody': 'TSUS', 'startDate': '20...",e98e1327-b1b9-4481-94c3-8565023d5187
3,01BHKDBIJYIMU1ZSLWRZX238VM5FJU,"[{'verificationBody': 'BVCH', 'startDate': '20...",ce055d32-4d09-41f2-a6af-9ecd5edf28ec
4,01GN1N0RHE2KY6CRDPWDNY7ELCZKNT,"[{'verificationBody': 'LGAI', 'startDate': '20...",7f111e3e-f509-4ee8-a144-ef949d41ec85
...,...,...,...
3559,ZYXH65I6FVRCHO68AS8K64WYFS3V2O,"[{'verificationBody': 'BVCH', 'startDate': '20...",ec130a9b-d7c9-4058-9602-73fd35abef2e
3560,ZZ2UA0ANWJAF7LL3BE60P5HG1LRLAJ,"[{'verificationBody': 'LGAI', 'startDate': '20...",ea42ba32-a0d1-49fd-98dc-08b6d1407296
3561,ZZ99SW9AQ4MM9WTA4H7WGK2IQZ9BZK,[{'id': '44095342-edcc-48f0-8726-c6774e4e391d'...,70f9b392-e350-42e2-81d9-34af6af607d0
3562,ZZK6XB1XUWV596IZZ4H06G6E9KBC1G,"[{'verificationBody': 'LGAI', 'startDate': '20...",64489a48-62aa-4640-b953-8976a4c9a31a


In [113]:
orgUidMainnet = "b3d4e71d806e86ff1f8712b6854d65e2c178e873ee22b2f7d0da937dacbaa985"
orgUidTestnet = "fc7e4380bc8ba22f98a655e04b54af3aed122db34bb3357b3a9c3eb0ff7039bc"
all_api = getAllCdmProjects(orgUidMainnet)
len(all_api)

1
2
3
4
5
6
7


13154

In [80]:

# Comment if we want to update
issuances_ext = updateNewIssuances(all_api,df_issuances_extended,100,500,"mainnet")
issuances_ext.to_excel("testtest_hu_2.xlsx")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['issuances'] = df_filtered['warehouseProjectId'].map(issuance_map)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['estimations'] = df_filtered['estimations'].apply(clean_estimations)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_filtered['projectLocations'] = df_filtered

0
Successfully posted project with ID: 471627bd-3a75-4aea-8e98-e7748ccc0770
1
Successfully posted project with ID: 9835668b-b88c-4034-94fa-193f07e0ee82
2
Successfully posted project with ID: 62b702a0-05b4-4d5b-af7d-053f386bf547
3
Successfully posted project with ID: 4fbb99aa-bf10-4955-ba49-537541ce7c7f
4
Successfully posted project with ID: 1395cd07-72f6-4524-8180-145d405f5a0b
5
Successfully posted project with ID: 0e3c17c4-4e7f-4954-9301-e793a7a2da3c
6
Successfully posted project with ID: 6d99cc29-88f2-43ca-a4ae-916dc8971e74
7
Successfully posted project with ID: f30038c4-f500-4687-ac68-33ef8aa5ca6a
8
Successfully posted project with ID: 8bfc8509-d025-4dba-b8ff-7c56fa920d4f
9
Successfully posted project with ID: 387d4fe0-611f-442b-9955-cf9d7f994f41
10
Successfully posted project with ID: 6c1741a3-8976-48d8-a59e-bc21f684611e
11
Successfully posted project with ID: 664da4b9-843b-465e-8a2e-85b6bfef9aac
12
Successfully posted project with ID: f727eb3f-88f1-4454-9ec7-c5dfb48b7920
13
Succes

In [88]:
# try update issuance of first 10 projects

# def testnet_update_10_issuances_bef_2016(all_api,df_issuances_new):
#     # ONLY DO THIS IN TESTNET
#     # all_api.drop("estimations", axis=1, inplace=True)
#     issuances_ext = updateNewIssuances(all_api,df_issuances_new,0,10,"testnet")

# update_first_10_issuances = testnet_update_10_issuances_bef_2016(all_api,df_issuances_extended)



# def units_belong_to_first_10(df_issuances,retired_units):
#     first_10 = df_issuances[0:10]

#     df_retired_units_in_first_10 =retired_units[retired_units['projectId'].isin( first_10['projectId'])]
#     return df_retired_units_in_first_10

# df_retired_first_10 = units_belong_to_first_10(all_api,df_issuances_extended)
# df_retired_first_10


Unnamed: 0,projectId,issuances,warehouseProjectId
1681,HG20LFVJPFURJJ4M6E6MS07HTOZTR4,[{'id': '437a1b0b-2263-4484-ad67-a0e569cdea64'...,fed3d1e8-d218-4d4f-8f10-b4aa02d21704


In [None]:
# def process_projects(project_issuances, locations_mapping, project_id_to_warehouseId, issuance_mapping, project_id_to_locations,start,end):
#     # Select the first 100 projects
#     first_100_projects = project_issuances[start:end]

#     # Map additional information to the projects using the provided mappings
#     first_100_projects['warehouseProjectId'] = first_100_projects['projectId'].map(project_id_to_warehouseId)
#     first_100_projects['issuances'] = first_100_projects['projectId'].map(issuance_mapping)
#     first_100_projects['projectLocations'] = first_100_projects['projectId'].map(project_id_to_locations)

#     # Define columns to drop
#     projects_column_to_drop = ['country', 'inCountryRegion', 'geographicIdentifier']

#     # Drop the specified columns
#     first_100_projects = first_100_projects.drop(columns=projects_column_to_drop)

#     # Return the processed dataframe
#     return first_100_projects

# # first_100_projects = process_projects(merged_project_issuances, locations_mapping, project_id_to_warehouseId, issuance_mapping, project_id_to_locations)
# # first_100_projects.to_excel("First_100_projects.xlsx")

# processed_new_issuances =process_projects(merged_project_issuances, locations_mapping, project_id_to_warehouseId, issuance_mapping, project_id_to_locations,0,50000)
# processed_new_issuances.to_excel("see_new_added_iss.xlsx")


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_100_projects['warehouseProjectId'] = first_100_projects['projectId'].map(project_id_to_warehouseId)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_100_projects['issuances'] = first_100_projects['projectId'].map(issuance_mapping)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  first_100_p

In [None]:
# will do 9000 to 1000
# post_projects(merged_project_issuances,12600,13000,url_project_local,"create","ok","ok")

In [98]:
df_retired_clean

Unnamed: 0,unitStatus,unitCount,unitOwner,projectId,unitBlockStart,unitBlockEnd,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,unitType,correspondingAdjustmentDeclaration,unitRegistryLink,verificationBody,geographyIdentifier,vintageYear,id
0,Retired,5294,Brazil,DWXTGTQLAORUROS9KPVZJUSGI8UK70,65142364,65147657,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,RINA,"{""latitude"":-14.235004,""longitude"":-51.92528}",2005,DNV-CUK1293110659.42
6,Retired,5294,Brazil,8J911GX7NLREEIL5L09MKJTI9USERE,60482913,60488206,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV NORD,"{""latitude"":-14.235004,""longitude"":-51.92528}",2010,DNV-CUK1299845791.9
18,Retired,5000,Brazil,O5C8ZV4YZTL3001AX5RD5V1OL5OM0A,57297956,57302955,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,DNV,"{""latitude"":-14.235004,""longitude"":-51.92528}",2006,DNV-CUK1264416604.37
36,Retired,5000,Brazil,HY2WB5CJ5DZTLBFLQE55ZHT0XK2DO2,61682096,61687095,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV SUD,"{""latitude"":-14.235004,""longitude"":-51.92528}",2011,TUEV-SUED1317711892.85
43,Retired,5000,Brazil,0IET49KTMORLOP6NX7Q56DF7HXTMPV,57923146,57928145,Brazil,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,DNV,"{""latitude"":-14.235004,""longitude"":-51.92528}",2008,DNV-CUK1265191517.98
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
130472,Retired,3,Chile,L9H0347KVT9F7MAMD49TAYUJLMFHEW,41951955,41951957,Chile,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV NORD,"{""latitude"":-35.675147,""longitude"":-71.542969}",2013,Applus1654182155.66
130476,Retired,1,China,B90WLUX8QKF2591LN21LEVJ82HOO75,1185996159,1185996159,China,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,LGAI,"{""latitude"":35.86166,""longitude"":104.195397}",2013,Applus1654080342.92-2
130478,Retired,186,Malawi,YHNSTOHUC3NLV2APHHB6SCWA61Q46T,207935,208120,Malawi,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,TUV SUD,"{""latitude"":-13.254308,""longitude"":34.301525}",2014,TUEV-SUED1446721135.84
130479,Retired,129,India,82ORS4DFFAE3F7JI64VMOSYQEYXGCB,279636945,279637073,India,,,Unknown,https://cdm.unfccc.int/Projects/Validation/DB/...,LGAI,"{""latitude"":20.593684,""longitude"":78.96288}",2015,Applus1557750802.84-1


In [116]:
df_retired_clean

def safe_convert_to_datetime_format(date_str):
    try:
        return pd.to_datetime(date_str).strftime('%Y-%m-%dT%H:%M:%S.000Z')
    except (pd.errors.OutOfBoundsDatetime, ValueError):
        return None 
        
def match_uploaded_issuance(row):
    
    for uploaded in row['uploadedIssuance']:
        start_date = safe_convert_to_datetime_format(row['startDate'])
        end_date = safe_convert_to_datetime_format(row['endDate'])


        # start_date = pd.to_datetime(row['startDate']).strftime('%Y-%m-%dT%H:%M:%S.000Z')
        # end_date = pd.to_datetime(row['endDate']).strftime('%Y-%m-%dT%H:%M:%S.000Z')

        # Now compare dates in the same format
        if (uploaded['verificationApproach'] == row['verificationApproach'] and
            uploaded['startDate'] == start_date and
            uploaded['verificationBody'] == row['verificationBody'] and
            uploaded['endDate'] == end_date):
            # print(uploaded)

            matched_issuance = uploaded.copy()
            matched_issuance.pop('orgUid', None) 
            return uploaded['id'], matched_issuance
    
        # print(f"No match for row: {row['verificationApproach']} | {row['startDate']} | {row['verificationBody']} | {row['endDate']}")
        # print(f"Compared against: {uploaded['verificationApproach']} | {uploaded['startDate']} | {uploaded['verificationBody']} | {uploaded['endDate']}")
    
    return None,None # Return None or appropriate value if no match found


def merging_issuances_units(df_retired_clean,df_issuances_cleaned,env,outputName):
    merged_issuances_retired_units = pd.merge(df_retired_clean, df_issuances_cleaned, on='id', how='left')
    df_issuances_cleaned_unique = df_issuances_cleaned.drop_duplicates(subset='id')

    duplicates = df_issuances_cleaned_unique.duplicated(subset='id', keep=False)

    # Filter the DataFrame to only include the duplicates
    df_issuances_duplicates = df_issuances_cleaned_unique[duplicates]
    merged_issuances_retired_units.drop('projectId_x', axis=1, inplace=True)

    merged_issuances_retired_units.drop('verificationBody_x', axis=1, inplace=True)
    merged_issuances_retired_units.rename(columns={'projectId_y': 'projectId'}, inplace=True)
    merged_issuances_retired_units.rename(columns={'verificationBody_y': 'verificationBody'}, inplace=True)
    merged_issuances_retired_units['correspondingAdjustmentStatus']= "Not Applicable"


    merged_issuances_retired_units.dropna()

    merged_issuances_retired_units['unitRegistryLink']='https://cdm.unfccc.int/Projects/Validation/DB/' +merged_issuances_retired_units['projectId']+"/view.html"
    merged_issuances_retired_units['warehouseProjectId'] = merged_issuances_retired_units['projectId'].map(project_id_to_warehouseId)
    merged_issuances_retired_units['uploadedIssuance']= merged_issuances_retired_units['projectId'].map(issuance_mapping)
    merged_issuances_retired_units['inCountryJurisdictionOfOwner']= merged_issuances_retired_units['unitOwner']

    merged_issuances_retired_units.rename(columns={'id': 'issuanceOriginalId'}, inplace=True)
    merged_issuances_retired_units['projectLocationId'] = merged_issuances_retired_units['projectId'].map(project_id_to_warehouseId)

    # WARNING JUST FOR THIS TESTNET ONLY YOOO
    if env == "testnet":
        merged_issuances_retired_units = merged_issuances_retired_units.dropna(subset=['uploadedIssuance'])
    if env == "mainnet":
        merged_issuances_retired_units = merged_issuances_retired_units.dropna(subset=['uploadedIssuance'])


    merged_issuances_retired_units[['newId', 'matchedIssuance']] = merged_issuances_retired_units.apply(
        lambda row: match_uploaded_issuance(row), 
        axis=1, 
        result_type='expand'
    )

    null_count = merged_issuances_retired_units['newId'].isna().sum()

    # Print the count
    print(f"The 'newId' column has {null_count} null values.")
    # if env!="testnet" :
    #     assert null_count == 0, f"There are {null_count} null values in the 'newId' column, but there should be none."

    if env=="testnet":
        merged_issuances_retired_units = merged_issuances_retired_units.dropna(subset=['matchedIssuance'])
    if env=="mainnet":
        merged_issuances_retired_units = merged_issuances_retired_units.dropna(subset=['matchedIssuance'])

    merged_issuances_retired_units.drop('projectId', axis=1, inplace=True)
    merged_issuances_retired_units.drop('geographyIdentifier', axis=1, inplace=True)
    merged_issuances_retired_units['unitType']="Not Determined"
    merged_issuances_retired_units.drop('uploadedIssuance', axis=1, inplace=True)
    merged_issuances_retired_units.drop('warehouseProjectId', axis=1, inplace=True)
    columns_to_drop = ['verificationBody', 'startDate', 'endDate', 'verificationReportDate', 'verificationApproach','issuanceOriginalId','newId']

    # Drop the specified columns
    merged_issuances_retired_units = merged_issuances_retired_units.drop(columns=columns_to_drop)
    merged_issuances_retired_units.rename(columns={'matchedIssuance': 'issuance'}, inplace=True)
    merged_issuances_retired_units['unitBlockStart'] = merged_issuances_retired_units['unitBlockStart'].astype(str)
    merged_issuances_retired_units['unitBlockEnd'] = merged_issuances_retired_units['unitBlockEnd'].astype(str)


    merged_issuances_retired_units.to_excel("./output/"+outputName)
    return merged_issuances_retired_units


merged_issuances_held_units = merging_issuances_units(df_units_held,df_issuances_cleaned,"testnet","held_units_100_500.xlsx")
merged_issuances_retired_units = merging_issuances_units(df_retired_clean,df_issuances_cleaned,"testnet","retired_units_100_500.xlsx")
# merged_issuances_held_units = merging_issuances_units(df_units_held,df_issuances_cleaned,"testnet")
print(len(merged_issuances_retired_units))
print(len(merged_issuances_retired_units['projectLocationId'].unique()))
print(len(merged_issuances_held_units))
print(len(merged_issuances_held_units['projectLocationId'].unique()))


# print(merged_issuances_retired_units['projectLocationId'].unique())



merged_issuances_held_units
# merged_issuances_retired_units
# merged_issuances_retired_units

The 'newId' column has 1879 null values.
The 'newId' column has 5454 null values.
6190
173
726
6190


Unnamed: 0,unitStatus,unitCount,unitBlockStart,unitBlockEnd,unitOwner,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,unitRegistryLink,correspondingAdjustmentDeclaration,unitType,vintageYear,correspondingAdjustmentStatus,projectLocationId,issuance
6,Held,44315.0,321372452.0,321416767,India,India,India,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2013,Not Applicable,c5a71c3e-2de5-499c-bbc6-534bb14bafe3,"{'id': '3f7157db-29a1-46db-a8a4-458ca5ec6eb6',..."
138,Held,1979.0,103422755.0,103424734,Brazil,Brazil,Brazil,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2012,Not Applicable,5aa97681-3154-42ce-8b5e-9fd38ac4276e,"{'id': '776b79f8-ed1e-4c06-b908-030198e31e5b',..."
139,Held,416028.0,103474663.0,103890691,Brazil,Brazil,Brazil,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2013,Not Applicable,5aa97681-3154-42ce-8b5e-9fd38ac4276e,"{'id': '733bb052-3334-4582-bbc5-f31ce8803608',..."
152,Held,188236.0,122445625.0,122633861,Brazil,Brazil,Brazil,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2015,Not Applicable,5aa97681-3154-42ce-8b5e-9fd38ac4276e,"{'id': '76da4916-d6f2-499e-a809-efd703eae2ef',..."
156,Held,9688.0,474731.0,484419,Cote d'Ivoire,Cote d'Ivoire,Cote d'Ivoire,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2015,Not Applicable,d8c0aabe-f4d2-4672-94dc-0129ca979f52,"{'id': '0697084e-2f92-40b2-8b4e-08d712f01d65',..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10985,Held,82144.0,1957479.0,2039623,Nepal,Nepal,Nepal,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2015,Not Applicable,fb337f04-3f31-4dbf-939d-5b783ae199a2,"{'id': '0dbabe79-f52a-4284-b306-951b03715ea3',..."
10986,Held,82577.0,126884.0,209461,Ethiopia,Ethiopia,Ethiopia,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2014,Not Applicable,4ea82c14-3939-424a-b2e5-b2d108e35db5,"{'id': 'df06fda0-76f0-4c38-903a-210147f978f7',..."
10989,Held,0.0,842755.0,842755,Ghana,Ghana,Ghana,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2013,Not Applicable,0225560e-bc47-4665-9ab6-a3567e750b0d,"{'id': '4106a48a-047f-4fa5-8789-d0f4474baef1',..."
10990,Held,7.0,3426126.0,3426133,Ghana,Ghana,Ghana,https://cdm.unfccc.int/Projects/Validation/DB/...,Unknown,Not Determined,2013,Not Applicable,0225560e-bc47-4665-9ab6-a3567e750b0d,"{'id': '4106a48a-047f-4fa5-8789-d0f4474baef1',..."


In [47]:
# df_units_held


# merged_issuances_held_units = pd.merge(df_units_held, df_issuances_cleaned, on='id', how='left')

# # merged_issuances_units
# df_issuances_cleaned
# df_issuances_cleaned_unique = df_issuances_cleaned.drop_duplicates(subset='id')

# df_issuances_cleaned_unique

# duplicates = df_issuances_cleaned_unique.duplicated(subset='id', keep=False)

# # Filter the DataFrame to only include the duplicates
# df_issuances_duplicates = df_issuances_cleaned_unique[duplicates]

# # df_issuances_duplicates.to_excel("./output/duplicateIssuances.xlsx")

# merged_issuances_held_units .drop('projectId_x', axis=1, inplace=True)

# merged_issuances_held_units.drop('verificationBody_x', axis=1, inplace=True)
# merged_issuances_held_units.rename(columns={'projectId_y': 'projectId'}, inplace=True)
# merged_issuances_held_units.rename(columns={'verificationBody_y': 'verificationBody'}, inplace=True)
# merged_issuances_held_units['correspondingAdjustmentStatus']= "Not Applicable"
# merged_issuances_held_units['correspondingAdjustmentStatus']= "Not Applicable"
# # merged_issuances_units['warehouseProjectId']= merged_issuances_units['projectId'].map(project_to_uuid_map)


# # merged_issuances_held_units.dropna()


# merged_issuances_held_units['unitRegistryLink']='https://cdm.unfccc.int/Projects/Validation/DB/' +merged_issuances_held_units['projectId']+"/view.html"
# merged_issuances_held_units['warehouseProjectId'] = merged_issuances_held_units['projectId'].map(project_id_to_warehouseId)
# merged_issuances_held_units['uploadedIssuance']= merged_issuances_held_units['projectId'].map(issuance_mapping)
# merged_issuances_held_units['inCountryJurisdictionOfOwner']= merged_issuances_held_units['unitOwner']

# # merged_issuances_retired_units['in']= merged_issuances_retired_units['projectId'].map(issuance_mapping)

# merged_issuances_held_units.rename(columns={'id': 'issuanceOriginalId'}, inplace=True)
# merged_issuances_held_units['projectLocationId'] = merged_issuances_held_units['projectId'].map(project_id_to_warehouseId)

# def match_uploaded_issuance(row):
#     for uploaded in row['uploadedIssuance']:

#         start_date = pd.to_datetime(row['startDate']).strftime('%Y-%m-%dT%H:%M:%S.000Z')
#         end_date = pd.to_datetime(row['endDate']).strftime('%Y-%m-%dT%H:%M:%S.000Z')

#         # Now compare dates in the same format
#         if (uploaded['verificationApproach'] == row['verificationApproach'] and
#             uploaded['startDate'] == start_date and
#             uploaded['verificationBody'] == row['verificationBody'] and
#             uploaded['endDate'] == end_date):
#             # print(uploaded)

#             matched_issuance = uploaded.copy()
#             matched_issuance.pop('orgUid', None) 
#             return uploaded['id'], matched_issuance
    
#         # print(f"No match for row: {row['verificationApproach']} | {row['startDate']} | {row['verificationBody']} | {row['endDate']}")
#         # print(f"Compared against: {uploaded['verificationApproach']} | {uploaded['startDate']} | {uploaded['verificationBody']} | {uploaded['endDate']}")
    
#     return None,None # Return None or appropriate value if no match found

# merged_issuances_held_units[['newId', 'matchedIssuance']] = merged_issuances_held_units.apply(
#     lambda row: match_uploaded_issuance(row), 
#     axis=1, 
#     result_type='expand'
# )

# null_count = merged_issuances_held_units['newId'].isna().sum()

# # Print the count
# print(f"The 'newId' column has {null_count} null values.")
# assert null_count == 0, f"There are {null_count} null values in the 'newId' column, but there should be none."


# merged_issuances_held_units.drop('projectId', axis=1, inplace=True)
# merged_issuances_held_units.drop('geographyIdentifier', axis=1, inplace=True)
# merged_issuances_held_units['unitType']="Not Determined"
# merged_issuances_held_units.drop('uploadedIssuance', axis=1, inplace=True)
# merged_issuances_held_units.drop('warehouseProjectId', axis=1, inplace=True)
# columns_to_drop = ['verificationBody', 'startDate', 'endDate', 'verificationReportDate', 'verificationApproach','issuanceOriginalId','newId']

# # Drop the specified columns
# merged_issuances_held_units = merged_issuances_held_units.drop(columns=columns_to_drop)
# merged_issuances_held_units.rename(columns={'matchedIssuance': 'issuance'}, inplace=True)
# merged_issuances_held_units['unitBlockStart'] = merged_issuances_held_units['unitBlockStart'].astype(str)
# merged_issuances_held_units['unitBlockEnd'] = merged_issuances_held_units['unitBlockEnd'].astype(str)




# merged_issuances_held_units.to_excel("./output/held_units_api_upload.xlsx")

# merged_issuances_held_units



TypeError: 'float' object is not iterable

In [51]:
def post_units(df, url, start_index, end_index):
  
    uuids = []
    # Iterate over the specified range of rows
    for i in range(start_index, min(end_index, len(df))):
        row = df.iloc[i]
        data = row.to_dict()
        response = requests.post(url, json=data)

        try:

            if response.status_code == 200:  # Check if the request was successful
                print("i is",i)
                response_data = response.json()
                uuids.append(response_data.get('uuid'))  # Get the UUID and add to the list
            else:
                    print("i is",i)
                    print(data)
                    print(f"Failed to post project with ID:  {row['issuance']['id']}. Status code: {response.status_code}. Message: {response.json()}")
                    break
            # if type=="update":

            #     response = requests.put(url, json=request_body)
        except RequestException as e:
            print(data)
            print(f"Request failed for project with ID:  {row['issuance']['id']}. Error: {e}")
            break
    return uuids

# Example usage


In [75]:
url = "http://localhost:31310/v1/units"  # Replace with your actual URL
start_index = 2300# Starting index
end_index = 3000  # Ending index

# Call the function with the DataFrame and the specified range
uuids = post_units(merged_issuances_retired_units, url, start_index, end_index)
uuids

i is 2300
i is 2301
i is 2302
i is 2303
i is 2304
i is 2305
i is 2306
i is 2307
i is 2308
i is 2309
i is 2310
i is 2311
i is 2312
i is 2313
i is 2314
i is 2315
i is 2316
i is 2317
i is 2318
i is 2319
i is 2320
i is 2321
i is 2322
i is 2323
i is 2324
i is 2325
i is 2326
i is 2327
i is 2328
i is 2329
i is 2330
i is 2331
i is 2332
i is 2333
i is 2334
i is 2335
i is 2336
i is 2337
i is 2338
i is 2339
i is 2340
i is 2341
i is 2342
i is 2343
i is 2344
i is 2345
i is 2346
i is 2347
i is 2348
i is 2349
i is 2350
i is 2351
i is 2352
i is 2353
i is 2354
i is 2355
i is 2356
i is 2357
i is 2358
i is 2359
i is 2360
i is 2361
i is 2362
i is 2363
i is 2364
i is 2365
i is 2366
i is 2367
i is 2368
i is 2369
i is 2370
i is 2371
i is 2372
i is 2373
i is 2374
i is 2375
i is 2376
i is 2377
i is 2378
i is 2379
i is 2380
i is 2381
i is 2382
i is 2383
i is 2384
i is 2385
i is 2386
i is 2387
i is 2388
i is 2389
i is 2390
i is 2391
i is 2392
i is 2393
i is 2394
i is 2395
i is 2396
i is 2397
i is 2398
i is 2399


['e481416e-6a33-44fa-81fc-89d8c7431864',
 '9121e679-6021-439f-b21e-723e1da3920f',
 '29962aff-658c-45ae-a4db-68615ce0e1b6',
 'b1dba7d4-e4f4-4405-93e7-001dbe902551',
 '97caefc5-9425-4c3c-a525-9b0c6e0886de',
 '38a8edb4-9499-4766-bccf-1d68f43b21bb',
 'd82aaf17-d9e1-47bd-bc04-bac97635b259',
 '8b406b44-345f-4afd-a19b-6bcf807135fd',
 '38c00eb0-8bb2-463b-98a5-8cf5e4abbf99',
 '516a3db4-119b-481f-a2c7-34c34fdb8d4c',
 '6f3a75b5-6b4f-4bbc-a157-8eaed3e8f325',
 'a6bfa65d-11d2-4764-92e1-7ec32954b3b2',
 '88329871-fdde-4004-a3c8-184cb4b93c87',
 '78cac77a-03f7-40bc-b3cf-23a3d242a136',
 'd7dfd844-4250-4672-bab9-3b94b6d82ddd',
 '4806ba4e-d12e-4cba-bb07-2f99490487be',
 '555a4341-32dc-48ca-87fb-88b01c8ecd17',
 '09ea07fb-bfff-4501-8503-429113dce013',
 '5ce565e4-e8c4-4a20-adb7-9740eb556fb6',
 '28dac9fe-e3fd-4be0-9c2c-4010365594db',
 'c1357734-86db-471d-97f4-a9097acf66be',
 '68099d5b-dc9f-41ef-ab14-5ed3862d422c',
 '5af1d20c-7572-48f3-bacc-12fe9eab1d29',
 '60bf60e0-f166-43f7-b2c5-23e2e9b0367a',
 'ace4371e-9fde-

In [None]:
df_checking_iss = pd.read_excel("./input/CDM-Issuances-November.xlsx")

df_checking_iss

duplic = df_checking_iss.duplicated(subset='Issuance process ID', keep=False)

# Filter the DataFrame to only include the duplicates
df_duplic = df_checking_iss[duplic]
df_duplic = df_duplic.sort_values(by='CDM project reference number', ascending=True)

df_duplic.to_excel("duplicduplic.xlsx")

len(df_checking_iss)




12548

In [None]:
def rename_duplicates(df, column_name):
   
    df_copy = df.copy()
    duplic = df_copy.duplicated(subset=column_name, keep=False)
    duplicates = df_copy[duplic].copy()
    duplicates['dup_count'] = duplicates.groupby(column_name).cumcount() + 1
    duplicates[column_name] = duplicates[column_name].astype(str) + '-' + duplicates['dup_count'].astype(str)
    duplicates.drop('dup_count', axis=1, inplace=True)
    df_copy.update(duplicates)
    return df_copy
updated_df = rename_duplicates(df_checking_iss, 'Issuance process ID')
updated_df



In [None]:
# {
#     "message": "Data Validation error",
#     "errors": [
#         "\"inCountryJurisdictionOfOwner\" is not allowed to be empty",
#         "\"unitBlockStart\" must be a string",
#         "\"unitBlockEnd\" must be a string",
#         "\"unitType\" is not allowed to be empty",
#         "\"issuance.orgUid\" is not allowed",
#         "\"verificationBody\" is not allowed"
#     ],
#     "success": false
# }



Unnamed: 0,originProjectId,projectName,program,sector,projectType,projectStatus,methodology,validationBody,validationDate,inCountryRegion,...,registryOfOrigin,unitMetric,projectLink,projectId,coveredByNDC,projectStatusDate,projectDeveloper,geographicIdentifier,description,ndcInformation
24,51Z7LD1J86IUVYIUFF42N3NZ6F6NOB,e7 Bhutan Micro Hydro Power CDM Project,PA,Energy industries (renewable - / non-renewable...,Hydro,Registered,CDM - AMS-I.A.,"JACO CDM CO., LTD",2004-06-04 00:00:00,Southern Asia,...,CDM Registry,tCO2e,https://cdm.unfccc.int,51Z7LD1J86IUVYIUFF42N3NZ6F6NOB,Unknown,2023-12-08,"E7, Kansai Electric","{""latitude"":27.514162,""longitude"":90.433601}",e7 Bhutan Micro Hydro Power CDM Project,
2254,1QA3P4SHFQ3HT5A1PADQD59FL9SPOC,"Dagachhu Hydropower Project, Bhutan",PA,Energy industries (renewable - / non-renewable...,Hydro,Registered,CDM - ACM0002,Det Norske Veritas- CUK,2008-09-03 00:00:00,Southern Asia,...,CDM Registry,tCO2e,https://cdm.unfccc.int,1QA3P4SHFQ3HT5A1PADQD59FL9SPOC,Unknown,2023-12-08,Pöyry Energy,"{""latitude"":27.514162,""longitude"":90.433601}","Dagachhu Hydropower Project, Bhutan",
7755,U18YV4JT0CQTF3G0LQ0I4FDSCSQM2L,"Punatsangchhu-I Hydroelectric Project, Bhutan",PA,Energy industries (renewable - / non-renewable...,Hydro,Registered,CDM - ACM0002,Bureau Veritas Certification Holding SAS (BVCH),2010-11-04 00:00:00,Southern Asia,...,CDM Registry,tCO2e,https://cdm.unfccc.int,U18YV4JT0CQTF3G0LQ0I4FDSCSQM2L,Unknown,2023-12-08,PHPA,"{""latitude"":27.514162,""longitude"":90.433601}","Punatsangchhu-I Hydroelectric Project, Bhutan",
8406,RAF02M1RO8Q7V2WW1RPI9ZB6U3QWM2,Rural Electrification Project for Clean Energy...,PA,Energy distribution,Energy distribution,Registered,CDM - AMS-III.AW.,RWTÜV GmbH,2013-01-22 00:00:00,Southern Asia,...,CDM Registry,tCO2e,https://cdm.unfccc.int,RAF02M1RO8Q7V2WW1RPI9ZB6U3QWM2,Unknown,2023-12-08,n.a.,"{""latitude"":27.514162,""longitude"":90.433601}",Rural Electrification Project for Clean Energy...,
8662,AV4D7HV34I0UH7WJ6S2G3UV9LA2KQV,Mangdechhu Hydroelectric Project Authority,PA,Energy industries (renewable - / non-renewable...,Hydro,Registered,CDM - ACM0002,Bureau Veritas Certification Holding SAS (BVCH),2014-06-26 00:00:00,Southern Asia,...,CDM Registry,tCO2e,https://cdm.unfccc.int,AV4D7HV34I0UH7WJ6S2G3UV9LA2KQV,Unknown,2023-12-08,n.a.,"{""latitude"":27.514162,""longitude"":90.433601}",Mangdechhu Hydroelectric Project Authority,
8977,976VA2GJENO0ZMVYWRQJ8XOPG2JXB5,Punatsangchhu-II Hydroelectric Project (1020 MW),PA,Energy industries (renewable - / non-renewable...,Hydro,Validated,CDM - ACM0002,Bureau Veritas Certification Holding SAS (BVCH),2014-06-26 00:00:00,Southern Asia,...,CDM Registry,tCO2e,https://cdm.unfccc.int,976VA2GJENO0ZMVYWRQJ8XOPG2JXB5,Unknown,2023-12-08,n.a.,"{""latitude"":27.514162,""longitude"":90.433601}",Punatsangchhu-II Hydroelectric Project (1020 MW),
9077,Z2GXVTKHQJ8VB82DHT5I70LG7DMABM,Substitution of grid power generation through ...,PA,Energy industries (renewable - / non-renewable...,Hydro,Withdrawn,CDM - ACM0002,Det Norske Veritas- CUK,2007-12-22 00:00:00,Southern Asia,...,CDM Registry,tCO2e,https://cdm.unfccc.int,Z2GXVTKHQJ8VB82DHT5I70LG7DMABM,Unknown,2023-12-08,Power Grid Corporation of India,"{""latitude"":27.514162,""longitude"":90.433601}",Substitution of grid power generation through ...,
12038,YDCO88MCSYRWQVM9SXIAN4ZVD0TFF0,"Dagachhu Hydropower Project, Bhutan",PA,Energy industries (renewable - / non-renewable...,Hydro,Validated,CDM - ACM0002,Det Norske Veritas- CUK,2007-10-23 00:00:00,Southern Asia,...,CDM Registry,tCO2e,https://cdm.unfccc.int,YDCO88MCSYRWQVM9SXIAN4ZVD0TFF0,Unknown,2023-12-08,"Caspervandertak, Gansu Tonghe Investment Proje...","{""latitude"":27.514162,""longitude"":90.433601}","Dagachhu Hydropower Project, Bhutan",
