In [45]:
import pandas as pd

import numpy as np

from datetime import datetime

import requests
from requests.exceptions import RequestException
import json

from datetime import datetime




IMPORT RELEVANT CSV FILE


In [18]:
# Main CDM Registry project datasource from Database PA POA
df_activities= pd.read_excel('CDM-Activities-November.xlsx')

# Country and geographic identifier file for easier mapping
df_country_code = pd.read_excel('cdm-country-code.xlsx')


# Countries in CADTrust, to check with the mismatch from UNFCCC in later data processing
df_cadt_country = pd.read_excel("CADTrust_Country.xlsx")

# LOAD UNEP file for PDD mapping
df_unep = pd.read_excel("cdm-pipeline.xlsx")







INITIAL CLEANUP


In [19]:

# FILL CDM project reference number NAN value to 0, since there are almost 3000 of them

df_activities['CDM project reference number'] = df_activities['CDM project reference number'].fillna(0).astype(int)


# Check projects with more than one host countries/ "List of host countries (ISO 2)"
more_countries = df_activities[df_activities["List of host countries (ISO 2)"].str.len() > 2]

# Create excel files for projects with more than one countries/ "List of host countries (ISO 2)",
more_countries.to_excel("CDM_projects_many_countries.xlsx")

# Map to CAD Trust based on the first country in the column
df_activities["List of host countries (ISO 2)"] = df_activities["List of host countries (ISO 2)"].str[:2]

# Apply geographic identifier that is suitable for CAD Trust mapping
df_country_code['geographicIdentifier'] = df_country_code.apply(
    lambda row: "{\"latitude\":" + str(row["latitude"]) + ",\"longitude\":" + str(row["longitude"]) + "}", axis=1
)

# Key Value mapping of PDD Consultant for CAD Trust projectDeveloper
pdd_developer_map = df_unep.set_index('Unique project ID')['PDD Consultant'].to_dict()

# Key Value mapping of reference number to unique identifier that will be useful for ISSUANCES and UNITS
id_to_id = df_activities.set_index('CDM project reference number')['Unique project identifier (traceable with Google)'].to_dict()

#Key Value mapping Country Code to CAD Trust Country Field
country_name_map = df_country_code.set_index('country')['cadtcountry'].to_dict()


#Key Value mapping Country Code to Geography Map
geography_map = df_country_code.set_index('name')['geographicIdentifier'].to_dict()

#Key Value mapping verification report date
verification_report_date_map = df_activities.set_index('Unique project identifier (traceable with Google)')['Start of first crediting period'].to_dict()


# Map to status fields from CDM to CAD Trust Project Status Picklist Value
project_status_map = {
    "Registered": "Registered",
    "Rejected": "Withdrawn",
    "Withdrawn":"Withdrawn",
    "WithdrawnBeforePublication": "Withdrawn",
    "Withdrawn Before Publication": "Withdrawn",
    "Deregistered": "De-registered",
    "Pending Publication": "Authorized",
    "Provisional": "Validated",
    "Validation Replaced": "Validated",
    "Requesting Registration": "Listed",
    "Validation Public": "Validated",
    "Validation Terminated": "Withdrawn",
    "NA":"Listed",
    "":"Listed",
    np.nan: "Listed"  # Handling actual NaN values
}
# Map from sector code number to it's description
sector_code_map = {
  "1": 'Energy industries (renewable - / non-renewable sources)',
  "2": 'Energy distribution',
  "3": 'Energy demand',
  "4": 'Manufacturing industries',
  "5": 'Chemical industries',
  "6": 'Construction',
  "7": 'Transport',
  "8": 'Mining/mineral production',
  "9": 'Metal production',
  "10": 'Fugitive emissions from fuels (solid, oil and gas)',
  "11": 'Fugitive emissions from production and consumption of halocarbons and sulphur hexafluoride',
  "12": 'Solvent use',
  "13": 'Waste handling and disposal',
  "14": 'Afforestation and reforestation',
  "15": 'Agriculture'
}


# basic cleanup
def fillna_by_dtype(df):
    for column in df.columns:
        if df[column].dtype == np.number:  # If the column is numeric
            df[column] = df[column].fillna(0)
        elif df[column].dtype == np.object:  # If the column is of object type (e.g., strings)
            df[column] = df[column].fillna('NA')
        elif pd.api.types.is_datetime64_any_dtype(df[column]):  # If the column is datetime
            df[column] = df[column].fillna(pd.Timestamp.min)
    return df

    


set_cadt_country = set(df_cadt_country['Country'])
set_country_code = set(df_country_code['name'])

# Find non-matching elements
non_matching_in_cadt_country = set_cadt_country - set_country_code
non_matching_in_country_code = set_country_code - set_cadt_country




In [23]:
# Rename the df column to match CADTrust field

rename_columns  = {

  "Unique project identifier (traceable with Google)": "originProjectId",
  "Registration project title": "projectName",
  "Type of CDM project: PA/PoA": "program",
  "Sectoral scope number(s)": "sector",
  "Project type (UNEP DTU)": "projectType",
  "Website project status": "projectStatus",
  "Methodologies used at registration": "methodology",
  "DOE": "validationBody",
  "Start of validation": "validationDate",
  "Country subregion":"inCountryRegion",
  "List of host countries (ISO 2)": "country",

}

df_project = df_activities[rename_columns.keys()].rename(columns=rename_columns)

# Adding other relevant values to CAD Trust field
df_project["registryOfOrigin"]= "CDM UNFCCC"
df_project['unitMetric'] = 'tCO2e'
df_project['methodology'] = "CDM - " + df_project['methodology']
df_project['projectLink'] = 'https://cdm.unfccc.int'
df_project['projectId'] = df_project['originProjectId']
df_project['coveredByNDC'] = "Unknown"
df_project['projectStatusDate'] = datetime.now().strftime('%Y-%m-%d')

# Map the countries
df_project['country'] = df_project['country'].map(country_name_map)
df_project['sector'] = df_project['sector'].map(sector_code_map)
df_project['projectDeveloper'] = df_project['projectId'].map(pdd_developer_map)
df_project["geographicIdentifier"]= df_project['country'].map(geography_map)
df_project['description'] = df_project['projectName']
df_project['ndcInformation'] = "NA"

# Implement the fillna function created earlier
df_project = fillna_by_dtype(df_project)
df_project["projectStatus"]= df_project['projectStatus'].map(project_status_map)

# Output to check the new DataFrame
df_project.to_excel("CDM-Projects-mapped.xlsx")

# df_project['country'].head(10)





  if df[column].dtype == np.number:  # If the column is numeric
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif df[column].dtype == np.object:  # If the column is of object type (e.g., strings)


Findings After Failing to Push data to Chia staging API

In [40]:
# Yemen and Cape Verde not in CAD Trust picklist. This require approval from technical committee otherwise we cant push the data
# 5 projects in total
df_project_yemen_cape_verde = df_project[(df_project['country'] == 'Yemen') | (df_project['country'] == 'Cape Verde')]
# df_with_nan_cdmID has One nan value which is project CDM 10121 row 8459

df_with_nan_cdmID = df_project[df_project['projectId']=='NA']

# Projects that have projectID, but the rest are not filled properly
df_with_nan_validation_date = df_project[df_project['validationDate']=='NA']

# Telling this issue to unfccc
df_to_inform_UNFCCC_1 = df_activities[df_activities['Unique project identifier (traceable with Google)']
                                        .isna()]

df_to_inform_UNFCCC_2 = df_activities[df_activities['Unique project identifier (traceable with Google)']
                                        .isin(df_with_nan_validation_date['projectId'])]

# Concatenating the two DataFrame into one
df_to_inform_UNFCCC = pd.concat([df_to_inform_UNFCCC_1, df_to_inform_UNFCCC_2])

df_to_inform_UNFCCC



Unnamed: 0,CDM project reference number,Unique project identifier (traceable with Google),Registration project title,Type of CDM project: PA/PoA,Project classification,Sectoral scope number(s),Methodologies used at registration,Project type (UNEP DTU),Project subtype (UNEP DTU),DOE,...,Issuance (Para 218)/ Registration (Para 219),Last information provided on para. 218 and 219 of the PCP,Date of last information provided on Para. 218 and 219 of the PCP,Number of CPAs per host party (PoA),Amount of reduction per host party (PoA),Crediting type of the CPAs,Chronological number of registered CDM activity by country,Last communication with the Secretariat,Projects which cannot be renewed,Reporting date
8459,10121,,,PA,,,,,,,...,,,NaT,,,,,.,,03Nov2023
8860,0,Idval,,PoA,,,,Solar,Solar PV,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023
9218,0,BDF248C6JG1SKOZYQ5T3WVEHR7MXLU,,PoA,,,,Methane avoidance,Waste water,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023
9434,0,0SERXDY1GS89I4BD3T8QL8NPI6Z34A/,,PoA,,,,Landfill gas,Landfill flaring,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023
9479,0,A39MS6G57IZHWTBEVNPXRJOFL8K01C,,PoA,,,,Transport,Mode shift: road to rail,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023
10170,0,DB/Z31DKITEOQXOLO3V5IMYVBGQ8IYZP4,,PoA,,,,Waste,Waste water,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023
10272,0,Aux_ACG70V0UUUPNDPCRJL8KRYLHHSB0CJ,,PoA,,,,Methane avoidance,Domestic manure,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023
10404,0,UBX0FC1NMA8IV9H5LTOEG3PJKZSWD6,,PoA,,,,EE Industry,Building materials,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023
11243,0,D8IRSCM3W0ZGKAFXYJOQVL6HP2NEBU,,PoA,,,,EE households,Stoves,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023
11518,0,EL9SWD7R6H4MKP3CB8XUYAOFN2J5TZ,,PoA,,,,Mixed renewables,Solar & wind,,...,,,NaT,,,Non-included CPAs,,2023m8,,03Nov2023


DROP THE ROWS OF DATA MENTIONED ABOVE. AFTER THAT, the API call should be smoothly post all the entire data to CAD Trust chia datalayer local staging mode

In [44]:
df_cleaned_project = df_project[~((df_project['country'] == 'Yemen') | (df_project['country'] == 'Cape Verde'))]

# Select rows where projectId is not 'NA'
df_cleaned_project = df_project[df_project['projectId'] != 'NA']

# Select rows where validationDate is not 'NA'
df_cleaned_project = df_project[df_project['validationDate'] != 'NA']

df_cleaned_project.to_excel("cdm_projects_mapped_cleaned.xlsx")
df_cleaned_project


Unnamed: 0,originProjectId,projectName,program,sector,projectType,projectStatus,methodology,validationBody,validationDate,inCountryRegion,...,registryOfOrigin,unitMetric,projectLink,projectId,coveredByNDC,projectStatusDate,projectDeveloper,geographicIdentifier,description,ndcInformation
0,AuxID,Project for GHG emission reduction by thermal ...,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,SGS United Kingdom Limited (SGS),2003-12-01 00:00:00,Southern Asia,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,AuxID,Unknown,2023-11-27,PricewaterhouseCoopers,"{""latitude"":20.593684,""longitude"":78.96288}",Project for GHG emission reduction by thermal ...,
1,LJ80OQZR2JVOLTZZX4Y0EGB1N18AVO,HFC Decomposition Project in Ulsan,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,Japan Quality Assurance Organisation (JQA),2003-12-11 00:00:00,Eastern Asia,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,LJ80OQZR2JVOLTZZX4Y0EGB1N18AVO,Unknown,2023-11-27,Climate Experts,,HFC Decomposition Project in Ulsan,
2,SQA16OAS75TKWYZTTKK7G1LO0O8CMU,Brazil NovaGerar Landfill Gas to Energy Project,PA,Waste handling and disposal,Landfill gas,Registered,CDM - AM0003,Det Norske Veritas- CUK,2004-04-05 00:00:00,South America,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,SQA16OAS75TKWYZTTKK7G1LO0O8CMU,Unknown,2023-11-27,EcoSecurities,"{""latitude"":-14.235004,""longitude"":-51.92528}",Brazil NovaGerar Landfill Gas to Energy Project,
3,HG20LFVJPFURJJ4M6E6MS07HTOZTR4,La Esperanza Hydroelectric Project,PA,Energy industries (renewable - / non-renewable...,Hydro,Registered,CDM - AMS-I.D.,Det Norske Veritas- CUK,2004-07-04 00:00:00,Central America,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,HG20LFVJPFURJJ4M6E6MS07HTOZTR4,Unknown,2023-11-27,"WB-CF, 2E Carbon Access","{""latitude"":15.199999,""longitude"":-86.241905}",La Esperanza Hydroelectric Project,
4,V3ZBEVSXGFNT31E2CECR2NYE93FW5L,Project for GHG Emission Reduction by Thermal ...,PA,Fugitive emissions from production and consump...,HFCs,Registered,CDM - AM0001,Japan Quality Assurance Organisation (JQA),2005-12-06 00:00:00,Eastern Asia,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,V3ZBEVSXGFNT31E2CECR2NYE93FW5L,Unknown,2023-11-27,"WB-CF, SEPA FECO","{""latitude"":35.86166,""longitude"":104.195397}",Project for GHG Emission Reduction by Thermal ...,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13168,QJD9XJ5HBYCRZR6YV9ELX4WLEC5BJ1,Reduction in steam consumption through revampi...,PA,Energy demand,EE Industry,Withdrawn,CDM - AM0018,Bureau Veritas Certification Holding SAS (BVCH),2007-03-21 00:00:00,Southern Asia,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,QJD9XJ5HBYCRZR6YV9ELX4WLEC5BJ1,Unknown,2023-11-27,Rashtriya Chemicals & Fertilizers,"{""latitude"":20.593684,""longitude"":78.96288}",Reduction in steam consumption through revampi...,
13169,4FFA579VM5UA6G3BHJYNRH5Q5Q6S6W,Usina Petribu Renewable Generation with Sugarc...,PA,Energy industries (renewable - / non-renewable...,Biomass Energy,Withdrawn,CDM - ACM0006,SGS United Kingdom Limited (SGS),2006-04-12 00:00:00,South America,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,4FFA579VM5UA6G3BHJYNRH5Q5Q6S6W,Unknown,2023-11-27,MaxAmbiental,"{""latitude"":-14.235004,""longitude"":-51.92528}",Usina Petribu Renewable Generation with Sugarc...,
13170,R33WIRKXKCNOCGHYW84XRF3SJ0KKNI,Power generation from renewable sources  Aiur...,PA,Energy industries (renewable - / non-renewable...,Hydro,Withdrawn,CDM - ACM0002,RINA Services S.p.A. (RINA),2009-05-08 00:00:00,South America,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,R33WIRKXKCNOCGHYW84XRF3SJ0KKNI,Unknown,2023-11-27,Waycarbon,"{""latitude"":-14.235004,""longitude"":-51.92528}",Power generation from renewable sources  Aiur...,
13171,GT5ME4TC32NKLA6A22ZBR00FYBC54N,Roaring 40s Wind Farms Private Limited.,PA,Energy industries (renewable - / non-renewable...,Wind,Validated,CDM - ACM0002,Det Norske Veritas- CUK,2007-06-07 00:00:00,Southern Asia,...,CDM UNFCCC,tCO2e,https://cdm.unfccc.int,GT5ME4TC32NKLA6A22ZBR00FYBC54N,Unknown,2023-11-27,Satia Paper Mills,"{""latitude"":20.593684,""longitude"":78.96288}",Roaring 40s Wind Farms Private Limited.,


In [None]:

# Define the API endpoint
api_endpoint = "http://localhost:31310/v1/projects"

def datetime_to_string(value):
    if isinstance(value, datetime):
        return value.strftime("%Y-%m-%d")
    return value
# Function to send POST request for each row in the DataFrame
def post_projects(df):
    # Iterate over each row in the DataFrame
    for index, row in df[13074:13173].iterrows():
        # Construct the request body from the row, excluding 'projectLocation'
        # and adding 'projectLocations' which is not in df_project
        request_body = row.to_dict()
        request_body = {k: datetime_to_string(v) for k, v in row.to_dict().items()}
        # Example additional data for 'projectLocations' as it is not present in the DataFrame
        request_body['projectLocations'] = [
            {
                "country": request_body.get('country', 'Unknown'),
                "inCountryRegion": request_body.get('inCountryRegion', 'Unknown'),
                "geographicIdentifier":request_body.get('geographicIdentifier',{})  # replace with actual geographicIdentifier if available
            }
        ]
        
        # Remove the keys that are not needed for the POST request
        request_body.pop('country', None)
        request_body.pop('inCountryRegion', None)
        request_body.pop('geographicIdentifier', None)

       
        # Send POST request
        try:
            response = requests.post(api_endpoint, json=request_body)
            # Check if the request was successful
            if response.status_code == 200:
                print(f"Successfully posted project with ID: {row['originProjectId']}")
            else:
                print(request_body)
                print(f"Failed to post project with ID: {row['originProjectId']}. Status code: {response.status_code}. Message: {response.json()}")
                break
        except RequestException as e:
            print(request_body)
            print(f"Request failed for project with ID: {row['originProjectId']}. Error: {e}")
            break

# Call the function to start posting data
post_projects(df_project)
# df_project

In [118]:

df_issuances = pd.read_excel("CDM-Issuances-October.xlxs.xlsx")
# df_issuances.drop('DOE', axis=1, inplace=True)
df_issuances.drop('PA/PoA', axis=1, inplace=True)
df_issuances.drop('CP', axis=1, inplace=True)
df_issuances.drop('Monitoring report number', axis=1, inplace=True)
df_issuances.drop('Last_updated', axis=1, inplace=True)
df_issuances.drop('Project type (UNEP DTU)', axis=1, inplace=True)
df_issuances.drop('Project subtype (UNEP DTU)', axis=1, inplace=True)
df_issuances.drop('Units - Total', axis=1, inplace=True)
df_issuances.drop('Issuance date', axis=1, inplace=True)
df_issuances.drop('HostParty', axis=1, inplace=True)


issuances_columns={
    "CDM project reference number":"projectId",
    "DOE":"verificationBody",
    "Issuance process ID":"id",
    "Monitoring report started":"startDate",
    "Monitoring report ended":"endDate"
}
df_issuances['Monitoring report started'] = pd.to_datetime(df_issuances['Monitoring report started'])
df_issuances['Monitoring report ended'] = pd.to_datetime(df_issuances['Monitoring report ended'])
df_issuances['CDM project reference number'] = df_issuances['CDM project reference number'].map(id_to_id)
df_issuances = df_issuances[issuances_columns.keys()].rename(columns=issuances_columns)


df_issuances['verificationReportDate']=df_issuances['projectId'].map(verification_report_date_map)
df_issuances['verificationApproach']=""

df_issuances = fillna_by_dtype(df_issuances)

df_issuances

  if df[column].dtype == np.number:  # If the column is numeric
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif df[column].dtype == np.object:  # If the column is of object type (e.g., strings)


Unnamed: 0,projectId,verificationBody,id,startDate,endDate,verificationReportDate,verificationApproach
0,IBK3RIR7HVE1SZLK7YZMZPZ02UW7TS,KEarth,4KEarth1568615888.93,2017-04-01,2019-07-31,2014-01-01,
1,KZPP9BVTCRTYEUSRYB2L0ZNRJUDF2X,KEarth,4KEarth1568616446.5,2017-04-01,2019-07-31,2014-05-31,
2,P2UF559LX39J2VDPTZ13PAZUO3DRVV,KEarth,4KEarth1585297661.77,2018-01-01,2019-12-31,2011-01-01,
3,AXF2WJ2NOM4CHZGDOJW9S9DINIAQWS,KEarth,4KEarth1596186178.27,2018-01-01,2020-03-21,2013-03-22,
4,JOUI5BA4DLQS9MVXDKRLL1HHFVQ4M3,KEarth,4KEarth1599113457.49,2015-04-01,2017-12-31,2013-10-10,
...,...,...,...,...,...,...,...
12543,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,No info,sergeyf731618828753.52,2016-09-01,2017-09-30,2013-03-15,
12544,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,No info,sergeyf731618828921.37,2017-10-01,2018-12-08,2013-03-15,
12545,D4CRFLKI60AFK14YEHRDNPY16829DF,No info,sergeyf731650363867.4,2016-10-01,2017-06-30,2012-11-01,
12546,BN70H0KER9GB6Y0BPJBJ3Q4IBIXK7C,No info,slenzen1552989570.98,2017-01-01,2019-01-31,2013-12-27,


In [119]:
df_units = pd.read_excel("CDM-Issuances-October.xlxs.xlsx")


units_columns={
    "HostParty":"unitOwner",
    "CDM project reference number":"projectId",
    "Issuance process ID":"issuanceId",
    "DOE":"verificationBody",
    "Issuance process ID":"id",
   "Units - Total":"unitCount",
   "Issuance date":"vintageYear",

}

df_units = df_units[units_columns.keys()].rename(columns=units_columns)
df_units["unitOwner"] = df_units["unitOwner"].map(country_name_map)
df_units["projectId"] = df_units["projectId"].map(id_to_id)
df_units["countryJurisdictionOfOwner"] = df_units["unitOwner"]
df_units["inCountryJurisdictionOfOwner"] = ""
df_units["projectLocationId"] = ""
df_units["geographyIdentifier"] = df_units["unitOwner"].map(geography_map)
df_units["correspondingAdjustmentDeclaration"] = "Unknown"
df_units["unitRegistryLink"] = "http://cdm.unfccc.int/"
df_units["vintageYear"] = df_units["vintageYear"].astype(str).str[:4]
df_units["unitStatus"] = "Held"
df_units["unitType"] = ""
df_units["unitBlockStart"] = ""
df_units["unitBlockEnd"] = ""


df_units = fillna_by_dtype(df_units)

df_units

  if df[column].dtype == np.number:  # If the column is numeric
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif df[column].dtype == np.object:  # If the column is of object type (e.g., strings)


Unnamed: 0,unitOwner,projectId,id,verificationBody,unitCount,vintageYear,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,projectLocationId,geographyIdentifier,correspondingAdjustmentDeclaration,unitRegistryLink,unitStatus,unitType,unitBlockStart,unitBlockEnd
0,India,IBK3RIR7HVE1SZLK7YZMZPZ02UW7TS,4KEarth1568615888.93,KEarth,20519,2020,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,http://cdm.unfccc.int/,Held,,,
1,India,KZPP9BVTCRTYEUSRYB2L0ZNRJUDF2X,4KEarth1568616446.5,KEarth,18925,2020,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,http://cdm.unfccc.int/,Held,,,
2,India,P2UF559LX39J2VDPTZ13PAZUO3DRVV,4KEarth1585297661.77,KEarth,24595,2021,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,http://cdm.unfccc.int/,Held,,,
3,Thailand,AXF2WJ2NOM4CHZGDOJW9S9DINIAQWS,4KEarth1596186178.27,KEarth,43219,2021,Thailand,,,"{""latitude"":15.870032,""longitude"":100.992541}",Unknown,http://cdm.unfccc.int/,Held,,,
4,India,JOUI5BA4DLQS9MVXDKRLL1HHFVQ4M3,4KEarth1599113457.49,KEarth,116637,2022,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,http://cdm.unfccc.int/,Held,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12543,India,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,sergeyf731618828753.52,No info,1231148,2022,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,http://cdm.unfccc.int/,Held,,,
12544,India,3A5UYHPO8SISCX8T4BJ1AMN6SO780J,sergeyf731618828921.37,No info,1128556,2022,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,http://cdm.unfccc.int/,Held,,,
12545,China,D4CRFLKI60AFK14YEHRDNPY16829DF,sergeyf731650363867.4,No info,434350,2022,China,,,"{""latitude"":35.86166,""longitude"":104.195397}",Unknown,http://cdm.unfccc.int/,Held,,,
12546,India,BN70H0KER9GB6Y0BPJBJ3Q4IBIXK7C,slenzen1552989570.98,No info,102514,2020,India,,,"{""latitude"":20.593684,""longitude"":78.96288}",Unknown,http://cdm.unfccc.int/,Held,,,


In [120]:
df_retired = pd.read_excel("CDM_Retired_October.xlsx")
df_retired['Project number'] = df_retired['Project number'].fillna(0).astype(int)
df_retired["Status"] = "Retired"
df_retired["PARTY CODE"] = df_retired['PARTY CODE'].map(country_name_map)


vintage_year_map= df_units.set_index('projectId')['vintageYear'].to_dict()
issuance_id_map= df_units.set_index('projectId')['id'].to_dict()
verif_body_units_map= df_units.set_index('projectId')['verificationBody'].to_dict()
geog_identifier_units_map = df_units.set_index('projectId')['geographyIdentifier'].to_dict()




# df_retired


retired_columns={
    "Status":"unitStatus",
    "Total":"unitTotal",
    "PARTY CODE":"unitOwner",
    "Project number":"projectId",
    "SERIAL RANGE":"SERIAL RANGE"
}



df_retired = df_retired[retired_columns.keys()].rename(columns=retired_columns)
df_retired[['unitBlockStart', 'unitBlockEnd']] = df_retired['SERIAL RANGE'].str.split('-', 1, expand=True)
df_retired['unitBlockStart'] = df_retired['unitBlockStart'].fillna(0).astype(int)
df_retired['unitBlockEnd'] = df_retired['unitBlockEnd'].fillna(0).astype(int)
df_retired.drop('SERIAL RANGE', axis=1, inplace=True)
df_retired['projectId']=df_retired['projectId'].map(id_to_id)
df_retired["countryJurisdictionOfOwner"] = df_retired["unitOwner"]
df_retired["inCountryJurisdictionOfOwner"] = ""
df_retired["unitType"] = ""
df_retired["correspondingAdjustmentDeclaration"] = "Unknown"
df_retired["unitRegistryLink"] = "http://cdm.unfccc.int/"
df_retired["vintageYear"]= df_retired['projectId'].map(vintage_year_map)
df_retired["id"]= df_retired['projectId'].map(issuance_id_map)
df_retired["verificationBody"]= df_retired['projectId'].map(verif_body_units_map)
df_retired['geographyIdentifier']= df_retired['projectId'].map(geog_identifier_units_map)
df_retired["projectLocationId"] = ""
df_retired = fillna_by_dtype(df_retired)





df_retired


  if df[column].dtype == np.number:  # If the column is numeric
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  elif df[column].dtype == np.object:  # If the column is of object type (e.g., strings)


Unnamed: 0,unitStatus,unitTotal,unitOwner,projectId,unitBlockStart,unitBlockEnd,countryJurisdictionOfOwner,inCountryJurisdictionOfOwner,unitType,correspondingAdjustmentDeclaration,unitRegistryLink,vintageYear,id,verificationBody,geographyIdentifier,projectLocationId
0,Retired,2210,Honduras,HG20LFVJPFURJJ4M6E6MS07HTOZTR4,1,2210,Honduras,,,Unknown,http://cdm.unfccc.int/,2015,RWTUV1425040593.92,TUV NORD,"{""latitude"":15.199999,""longitude"":-86.241905}",
1,Retired,7304,Honduras,76B2SQ7Z56786E62H2ISI2HHQNBYX0,2211,9514,Honduras,,,Unknown,http://cdm.unfccc.int/,2014,TUEV-RHEIN1366872726.97,TUV NORD,"{""latitude"":15.199999,""longitude"":-86.241905}",
2,Retired,48230,India,NMHQDVVH5DX47Y8VGVARMLMF76RWAP,1,48230,India,,,Unknown,http://cdm.unfccc.int/,2014,TUEV-SUED1397119754.81,TUV SUD,"{""latitude"":20.593684,""longitude"":78.96288}",
3,Retired,45988,Brazil,T3TA3SUQYRH7QZ2O5HCAO56AD062PE,1,45988,Brazil,,,Unknown,http://cdm.unfccc.int/,2005,SGS-UKL1124438867.71,SGS,"{""latitude"":-14.235004,""longitude"":-51.92528}",
4,Retired,542829,India,YL3CYU9HUXW76LEKIZMWMCVJF1G9E1,48231,591059,India,,,Unknown,http://cdm.unfccc.int/,2013,SGS-UKL1349867953.32,SGS,"{""latitude"":20.593684,""longitude"":78.96288}",
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12529,Retired,3241,India,HZ71HCGMVWBSLX4COTU7NONB1ILUBE,325185188,325188428,India,,,Unknown,http://cdm.unfccc.int/,2013,RWTUV1350367048.39,TUV NORD,"{""latitude"":20.593684,""longitude"":78.96288}",
12530,Retired,59442,India,HZ71HCGMVWBSLX4COTU7NONB1ILUBE,325188429,325247870,India,,,Unknown,http://cdm.unfccc.int/,2013,RWTUV1350367048.39,TUV NORD,"{""latitude"":20.593684,""longitude"":78.96288}",
12531,Retired,167997,China,FK6KNM929ANB7LR9JBWHCTVNJOSOHQ,1194113250,1194281246,China,,,Unknown,http://cdm.unfccc.int/,2023,CTI1666763574.07,CTI,"{""latitude"":35.86166,""longitude"":104.195397}",
12532,Retired,540390,Brazil,M2UR4SLALKN8YAZT5ZJ11YR6BDXBLG,211555446,212095835,Brazil,,,Unknown,http://cdm.unfccc.int/,2023,RINA1677171969.86,RINA,"{""latitude"":-14.235004,""longitude"":-51.92528}",


In [48]:

# Define the API endpoint
api_endpoint = "http://localhost:31310/v1/projects"

def datetime_to_string(value):
    if isinstance(value, datetime):
        return value.strftime("%Y-%m-%d")
    return value
# Function to send POST request for each row in the DataFrame
def post_projects(df):
    # Iterate over each row in the DataFrame
    for index, row in df[8004:8005].iterrows():
        # Construct the request body from the row, excluding 'projectLocation'
        # and adding 'projectLocations' which is not in df_project
        request_body = row.to_dict()
        request_body = {k: datetime_to_string(v) for k, v in row.to_dict().items()}
        # Example additional data for 'projectLocations' as it is not present in the DataFrame
        request_body['projectLocations'] = [
            {
                "country": request_body.get('country', 'Unknown'),
                "inCountryRegion": request_body.get('inCountryRegion', 'Unknown'),
                "geographicIdentifier":request_body.get('geographicIdentifier',{})  # replace with actual geographicIdentifier if available
            }
        ]
        
        # Remove the keys that are not needed for the POST request
        request_body.pop('country', None)
        request_body.pop('inCountryRegion', None)
        request_body.pop('geographicIdentifier', None)

       
        # Send POST request
        try:
            response = requests.post(api_endpoint, json=request_body)
            # Check if the request was successful
            if response.status_code == 200:
                print(f"Successfully posted project with ID: {row['originProjectId']}")
            else:
                print(request_body)
                print(f"Failed to post project with ID: {row['originProjectId']}. Status code: {response.status_code}. Message: {response.json()}")
                break
        except RequestException as e:
            print(request_body)
            print(f"Request failed for project with ID: {row['originProjectId']}. Error: {e}")
            break

# Call the function to start posting data
post_projects(df_cleaned_project)
# df_project

{'originProjectId': '132XKMFCRDJ3IU5XP99CUXV4G0DSK8', 'projectName': 'Southern African Solar LED Programme', 'program': 'PoA', 'sector': 'Energy industries (renewable - / non-renewable sources)', 'projectType': 'Solar', 'projectStatus': 'Registered', 'methodology': 'CDM - AMS-III.AR.', 'validationBody': 'Bureau Veritas Certification Holding SAS (BVCH)', 'validationDate': '2012-01-22', 'registryOfOrigin': 'CDM UNFCCC', 'unitMetric': 'tCO2e', 'projectLink': 'https://cdm.unfccc.int', 'projectId': '132XKMFCRDJ3IU5XP99CUXV4G0DSK8', 'coveredByNDC': 'Unknown', 'projectStatusDate': '2023-11-27', 'projectDeveloper': 'EcoMetrix Africa', 'description': 'Southern African Solar LED Programme', 'ndcInformation': 'NA', 'projectLocations': [{'country': 'NA', 'inCountryRegion': 'Eastern Africa; Southern Africa', 'geographicIdentifier': 'NA'}]}
Failed to post project with ID: 132XKMFCRDJ3IU5XP99CUXV4G0DSK8. Status code: 400. Message: {'message': 'Data Validation error', 'errors': ["ProjectLocation Count