In [12]:
import pandas as pd
import numpy as np
import glob
import os

# Import data frames

First, we start by importing the different csv files and continue by concatenating the files into one dataframe.

In [4]:
def get_data(path, filename_start):
    '''a function to store the content of a directory into a pd dataframe'''
    
    # checking the contents of the directory using the os-module. 
    files = [
        file for file in os.listdir(path) 
        if file.startswith(filename_start)
        ]
    
    print(files)  
    
    # iterate through files and add to the data frame
    all_data = pd.DataFrame()
    for file in files:
        current_data = pd.read_csv(path+"/"+file)
        all_data = pd.concat([all_data, current_data], ignore_index=True)

    # remove whitespaces from column names
    all_data.columns = all_data.columns.str.replace(' ', '')
        
    print(f'''A dataframe with {all_data.shape[0]} rows and {all_data.shape[1]} columns has been created!\nAll whitespace in column names has been removed.''')
    
    return all_data

## Cities Disclosing / Responses

In [0]:
# add all cities disclosers to a dateframe
df_cities_disc = get_data(
    path='data/Cities/Disclosing',
    filename_start='20'
    )

In [5]:
# add all cities responses to a dateframe
df_cities_resp = get_data(
    path='data/Cities/Responses',
    filename_start='20'
    )

['2019_Full_Cities_Dataset.csv', '2018_Full_Cities_Dataset.csv', '2020_Full_Cities_Dataset.csv']
A dataframe with 1542496 rows and 18 columns has been created!
All whitespace in column names has been removed.


## Corporates Disclosing / Responses

In [0]:
# add all corporates disclosings to a dateframe
df_corp_disc = get_data(
    path='data/Corporations/Disclosing',
    filename_start='20'
    )

In [0]:
# add all corporates  responses to a dateframe
df_corp_resp = get_data(
    path='data/Corporations/Responses',
    filename_start='20'
    )

# Dataframe Description

## Cities

Below is a description of the columns for the **City Disclosures**:

**Year Reported to CDP** : Cities Disclosure cycle survey year  

**Account Number** : The unique identifier given to every city organisation that receives a request to complete a CDP questionnaire  

**Organization**: Name of the City organisation disclosing  

**City**: Name of the City the city organisation is disclosing on behalf of  

**Country**: Country of city  

**CDP Region**: CDP operation region City is located within  

**Reporting Authority**: "CDP collects information on behalf of a number of additional initiatives. Other than CDP Cities, organisations can indicate the additional initiatve they are have answered questions for ","C40,CDP Cities,ICLEI - Local Governments for Sustainability","Includes Global Covenant of Mayors for Climate and Energy, ICLEI Green Climate Cities, ICLEI Ecomobility / Ecologistics, C40 Cities Climate Leadership Group"  

**Access**: Cities can submit CDP response in public status or in non public status. Non public responses can only be shared within CDP and between signatory partners. Public responses can be shared beyond CDP City organisations,public  

**First Time Discloser**: Is the City disclosing for the first time to CDP  

**Population**: Citiy population estimate  

**Population Year**: City population estimate year  

**City Location**: "Citty location cordinates by longitude, latitide"  

**Last update**: Resonse record last update  

*Below is a description of the column types for **City Responses**:*  


**Questionnaire** : Questionnaire and questionnaire year the company's response relates to

**Year Reported to CDP** : Cities Disclosure cycle survey year  

**Account Number** : The unique identifier given to every city organisation that receives a request to complete a CDP questionnaire  

**Organization** : Name of the City organisation disclosing  

**Country** : Country of city    

**CDP Region** : CDP operation region City is located within  

**Parent Section** : Module ('Parent Section') of the CDP questionnaire the question belongs to (e.g. Emissions Reduction)  

**Section** : Section of the CDP questionnaire the question belongs to (e.g.Mitigation Actions)  

**Question Number** : Question number of response (e.g. 5.4) 
 
**Question Name** : Describes the anticipated outcomes of the most impactful mitigation actions your city is currently undertaking; the total cost of the action and how much is being funded by the local government 

**Column Number** : Column number of matrix set (Table) or matrix dynamic (Add Rows Table) column in question  reponse table  

**Column Name** : Column name of matrix set (Table) or matrix dynamic (Add Rows Table) column in question  reponse table,Co-benefit area 

**Row Number** : "Row number of matrix set (Table) or matrix dynamic (Add Rows Table) row in question  reponse table. If originally submitted in a table format, this will indicate the number of rows of response data has been entered in response to a question. ",  

**Row Name** : Row name of matrix set (Table) or matrix dynamic (Add Rows Table) row in question  reponse table. Description of data type for RowNumber where applicable, Population that is food insecure)  

**Response Answer** : Question response submitted by company,Greening the economy,"Can range from string, integar and double data types. Question not applicable = This question was not presented to the company to be answered due to conditional logic in the questionnaire. NA = The company was presented with this question but did not respond"  

## Corporates

No description so far!

# Data Content

In [9]:
df_cities_resp.value_counts('Questionnaire')

Questionnaire
Cities 2020        869313
Cities 2019        486349
CDP Cities 2018    186834
dtype: int64

In [10]:
df_cities_resp.query('QuestionNumber == "14.4" & Questionnaire == "Cities 2020"')

Unnamed: 0,Questionnaire,YearReportedtoCDP,AccountNumber,Organization,Country,CDPRegion,ParentSection,Section,QuestionNumber,QuestionName,ColumnNumber,ColumnName,RowNumber,RowName,ResponseAnswer,Comments,FileName,Lastupdate
2300,Cities 2020,2020,49342,City of Rochester,United States of America,North America,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Yes,,,09/07/2020 09:45:36 AM
2820,Cities 2020,2020,50551,City of Long Beach,United States of America,North America,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Yes,,,09/07/2020 09:45:36 AM
5426,Cities 2020,2020,54290,Qingdao Municipal Government,China,East Asia,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Yes,,,09/07/2020 09:45:36 AM
5949,Cities 2020,2020,42388,Intendencia de Montevideo,Uruguay,Latin America,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Intending to undertake in next 2 years,,,09/07/2020 09:45:36 AM
5988,Cities 2020,2020,60369,Alcaldía Municipal de Armenia,Colombia,Latin America,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Intending to undertake in next 2 years,,,09/07/2020 09:45:36 AM
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
864416,Cities 2020,2020,58668,"City of New Bedford, MA",United States of America,North America,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Intending to undertake in next 2 years,,,09/07/2020 09:45:36 AM
865810,Cities 2020,2020,43930,The Hague,Netherlands,Europe,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Yes,,,09/07/2020 09:45:36 AM
867912,Cities 2020,2020,2185,Bristol City Council,United Kingdom of Great Britain and Northern I...,Europe,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Yes,,,09/07/2020 09:45:36 AM
868284,Cities 2020,2020,69995,Kemi,Finland,Europe,Water Security,Water Supply Management,14.4,Does your city have a publicly available Water...,0,,0,,Not intending to undertake,,,09/07/2020 09:45:36 AM


In [7]:
def question_to_col(data, QuestionNumber, ColumnNumber):
    '''A simple function to create a new dataframe with all responses to one question.'''
    small_df = pd.DataFrame(columns=[QuestionNumber])
    small_df[QuestionNumber] = data['ResponseAnswer'].loc[(data['QuestionNumber'] == QuestionNumber) & (data['ColumnNumber'] == int(ColumnNumber))]
    return small_df

In [11]:
question_to_col(data=df_cities_resp, QuestionNumber='2.1', ColumnNumber='1')

Unnamed: 0,2.1
81,Flood and sea level rise > Permanent inundation
457,Water Scarcity > Drought
598,Extreme Precipitation > Fog
634,Extreme hot temperature > Heat wave
640,Extreme hot temperature > Extreme hot days
...,...
867990,
867999,Water Scarcity > Drought
868373,Storm and wind > Severe wind
868679,Wild fire > Forest fire
