In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import re

import seaborn as sns
%matplotlib inline
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
import plotly.io as pio
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
from wordcloud import WordCloud, STOPWORDS
from matplotlib_venn import venn3, venn3_circles, venn3_unweighted
from matplotlib_venn import venn2, venn2_circles, venn2_unweighted
pd.set_option('display.max_columns', 500)


In [None]:
# EDA of Cities
# Discloser dataset 
cities18= pd.read_csv("../input/cdp-unlocking-climate-solutions/Cities/Cities Disclosing/2018_Cities_Disclosing_to_CDP.csv")
cities19 = pd.read_csv("../input/cdp-unlocking-climate-solutions/Cities/Cities Disclosing/2019_Cities_Disclosing_to_CDP.csv")
cities20 = pd.read_csv("../input/cdp-unlocking-climate-solutions/Cities/Cities Disclosing/2020_Cities_Disclosing_to_CDP.csv")
#Survey dataset
citySurvey18 = pd.read_csv("../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2018_Full_Cities_Dataset.csv")
citySurvey19 = pd.read_csv("../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2019_Full_Cities_Dataset.csv")
citySurvey20 = pd.read_csv("../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2020_Full_Cities_Dataset.csv")


# cities metadata - lat,lon locations for US cities
cities_meta_df = pd.read_csv("../input/cdp-unlocking-climate-solutions/Supplementary Data/Simple Maps US Cities Data/uscities.csv")

# subset for lat, lng cities data
cities_meta_df = cities_meta_df[['city', 'state_id', 'lat','lng']].rename(columns={'city' : 'address_city', 'state_id' : 'state'})
cities_meta_df.head()
#Firt Time Discloser

def newrecruits(df):
    cities = df[df['First Time Discloser']== "Yes"]
    cities = cities[['Year Reported to CDP', 'CDP Region']]
    cities = cities.rename(columns={'CDP Region': 'CDPRegion','Year Reported to CDP':'Year' })
    return(cities)

#get country counts for new recruits
def newcount(df):
    counts =df.groupby(['CDPRegion','Year']).size().reset_index(name = "Total")
    return counts


def graphinfo(df):
    city = newrecruits(df)
    city = newcount(city)   
    return city


city18 = graphinfo(cities18)
city19= graphinfo(cities19)
city20= graphinfo(cities20)
city = pd.concat([city18, city19, city20])



### EDA of Cities Data (Discloser + Survey) 

1. How many new cities joined CDP over the years(2018,2019,2020)? 

In [None]:
df = city
fig = px.bar(df, x="CDPRegion", y="Total",
          color="CDPRegion", facet_col="Year",title="First Time Discloser"
           )
fig.update_layout(
    height=417,
    xaxis_title="CDP Region",
    yaxis_title="Total",
    xaxis_visible=False,
    xaxis2_visible=False,
    xaxis3_visible=False)
fig.show();

* **Based on the figure above, it looks like more cities joined CDP in 2019, compare to year 2018 and 2020. 
* **Also, 2019 figure shows higher level of geographical diversity compare to 2018 and 2019.

### CDP Cities membership over the years (2018,2019, and 2020)

In [None]:
set1 = set(cities18['Account Number'])  
set2 = set(cities19['Account Number']) 
set3 = set(cities20['Account Number'])
intersection = len(set1.intersection(set2, set3))
venn3([set1, set2, set3], set_labels = ('Yr_2018', 'Yr_2019', 'Yr_2020'), alpha = 0.5)
plt.title('CDP Membership Retention over the years(2018-2020)')


**Venn diagram shows 378 cities have been CDP members from 2018-2020.

In [None]:
##merge survey and city discloser dfs
filtercities = cities19[['Account Number', 'City', 'Reporting Authority' ]]
filtercities20 = cities20[['Account Number', 'City', 'Reporting Authority' ]]

def mergesurveydis(df1, df2):
    mergecity = pd.merge(df1, df2, how = 'right', on = 'Account Number')
    return mergecity

mergedcity19 =mergesurveydis(filtercities, citySurvey19)
mergedcity20 =mergesurveydis(filtercities20, citySurvey20)



## EDA of 2019 and 2020 Cities' Data
##### General sentiment of Climate Change and Water Security by Sector Region

In [None]:
#filter comments 
#filter columns with comments

def filtercomments(sub,df):
    comments = df.loc[df.Comments.notnull()]
    comments = comments[comments['CDP Region']==sub]
    comments = comments['Comments']
    return comments

#join all the comments in the column
def converttostr(input_seq, seperator):
    
   # Join all the strings in list
    final_str = seperator.join(input_seq)
    return final_str


def filterjoin(sub, df):
    seperator = ' '
    comment = filtercomments(sub, df)
    comment = converttostr(comment, seperator)
    return comment

# clean text in comment section
#filter comments by Parent Section
Latin_America  = filterjoin('Latin America',mergedcity19)
North_America = filterjoin('North America',mergedcity19)                
Europe = filterjoin('Europe', mergedcity19)    
Southeast_Asia = filterjoin('Southeast Asia and Oceania', mergedcity19)                           
Africa  =  filterjoin('Africa', mergedcity19)                       
East_Asia = filterjoin('East Asia', mergedcity19)        
South_West_Asia = filterjoin('South and West Asia', mergedcity19)                        
Middle_East  =  filterjoin('Middle East', mergedcity19)    

#create word cloud for each CDP Region: 

# Generate the image
app_words = ['https', 'appendix', 'http', 'html']
STOPWORDS.update(app_words)



Latin_America = WordCloud(max_words=100, min_word_length=10, background_color='black', colormap='Set2',collocations=False, stopwords = STOPWORDS).generate(Latin_America)
North_America = WordCloud(max_words=100, min_word_length=8, background_color='black', colormap='Set2',collocations=False, stopwords = STOPWORDS).generate(North_America)
Europe  = WordCloud(max_words=100, min_word_length=8, background_color='black', colormap='Set2',collocations=False, stopwords = STOPWORDS).generate(Europe)
Southeast_Asia = WordCloud(max_words=100, min_word_length=8, background_color='black', colormap='Set2',collocations=False, stopwords = STOPWORDS).generate(Southeast_Asia)
Africa = WordCloud(max_words=100, min_word_length=8, background_color='black', colormap='Set2', collocations=False,stopwords = STOPWORDS).generate(Africa)
East_Asia = WordCloud(max_words=100, min_word_length=8, background_color='black', colormap='Set2',collocations=False, stopwords = STOPWORDS).generate(East_Asia)
South_West_Asia = WordCloud(max_words=100, min_word_length=8, background_color='black', colormap='Set2',collocations=False, stopwords = STOPWORDS).generate(South_West_Asia)
Middle_East= WordCloud(max_words=100, min_word_length=8, background_color='black', colormap='Set2', collocations=False,stopwords = STOPWORDS).generate(Middle_East)

# visualize the image
plt.figure(figsize=(40, 30))
plt.subplot(421)
plt.title("Latin America",fontsize= 57)
plt.imshow(Latin_America, interpolation='bilinear', alpha = 1)
plt.subplot(422)
plt.title("North America",fontsize= 57)
plt.imshow(North_America, interpolation='bilinear', alpha = 1)
plt.subplot(423)
plt.title("Europe",fontsize= 57)
plt.imshow(Europe, interpolation='bilinear', alpha = 1)
plt.subplot(424)
plt.title("South East Asia",fontsize= 57)
plt.imshow(Southeast_Asia, interpolation='bilinear', alpha = 1)
plt.subplot(425)
plt.title("Africa",fontsize= 57)
plt.imshow(Africa, interpolation='bilinear', alpha = 1)
plt.subplot(426)
plt.title("East Asia",fontsize= 57)
plt.imshow(East_Asia, interpolation='bilinear', alpha = 1)
plt.subplot(427)
plt.title("South and West Asia",fontsize= 57)
plt.imshow(South_West_Asia, interpolation='bilinear', alpha = 1)
plt.subplot(428)
plt.title("Middle East",fontsize= 57)
plt.imshow(Middle_East, interpolation='bilinear', alpha = 1)
plt.show()



Analysis of US Cities

In [None]:
# cities metadata - CDP metadata on organisation HQ cities
cities_cdpmeta_df = pd.read_csv("../input/cdp-unlocking-climate-solutions/Supplementary Data/Locations of Corporations/NA_HQ_public_data.csv")
# import cities response df
cities_df = pd.read_csv("../input/cdp-unlocking-climate-solutions/Cities/Cities Responses/2020_Full_Cities_Dataset.csv")
# state abbreviation dictionary
def list_dedupe(x):
    return list(dict.fromkeys(x))
us_state_abbrev = {
    'Alabama': 'AL',
    'Alaska': 'AK',
    'American Samoa': 'AS',
    'Arizona': 'AZ',
    'Arkansas': 'AR',
    'California': 'CA',
    'Colorado': 'CO',
    'Connecticut': 'CT',
    'Delaware': 'DE',
    'District of Columbia': 'DC',
    'Florida': 'FL',
    'Georgia': 'GA',
    'Guam': 'GU',
    'Hawaii': 'HI',
    'Idaho': 'ID',
    'Illinois': 'IL',
    'Indiana': 'IN',
    'Iowa': 'IA',
    'Kansas': 'KS',
    'Kentucky': 'KY',
    'Louisiana': 'LA',
    'Maine': 'ME',
    'Maryland': 'MD',
    'Massachusetts': 'MA',
    'Michigan': 'MI',
    'Minnesota': 'MN',
    'Mississippi': 'MS',
    'Missouri': 'MO',
    'Montana': 'MT',
    'Nebraska': 'NE',
    'Nevada': 'NV',
    'New Hampshire': 'NH',
    'New Jersey': 'NJ',
    'New Mexico': 'NM',
    'New York': 'NY',
    'North Carolina': 'NC',
    'North Dakota': 'ND',
    'Northern Mariana Islands':'MP',
    'Ohio': 'OH',
    'Oklahoma': 'OK',
    'Oregon': 'OR',
    'Pennsylvania': 'PA',
    'Puerto Rico': 'PR',
    'Rhode Island': 'RI',
    'South Carolina': 'SC',
    'South Dakota': 'SD',
    'Tennessee': 'TN',
    'Texas': 'TX',
    'Utah': 'UT',
    'Vermont': 'VT',
    'Virgin Islands': 'VI',
    'Virginia': 'VA',
    'Washington': 'WA',
    'West Virginia': 'WV',
    'Wisconsin': 'WI',
    'Wyoming': 'WY'
}

# map dict to clean full state names to abbreviations
cities_cdpmeta_df['state'] = cities_cdpmeta_df['address_state'].map(us_state_abbrev)

# infill non-matched from dict
cities_cdpmeta_df['state'] = cities_cdpmeta_df['state'].fillna(cities_cdpmeta_df['address_state'])
cities_cdpmeta_df['state'] = cities_cdpmeta_df['state'].replace({'ALBERTA':'AB'})
cities_cdpmeta_df['address_city'] = cities_cdpmeta_df['address_city'].replace({'CALGARY':'Calgary'})
cities_cdpmeta_df= cities_cdpmeta_df.drop(columns=['address_state'])

# create joint city state variable
cities_cdpmeta_df['city_state'] = cities_cdpmeta_df['address_city'].str.cat(cities_cdpmeta_df['state'],sep=", ")
cities_count = cities_cdpmeta_df[['organization', 'address_city', 'state', 'city_state']].groupby(['address_city', 'state', 'city_state']).count().sort_values(by = ['organization'],ascending = False).reset_index().rename(columns={'organization' : 'num_orgs'})

# convert indexes to columns'
cities_count.reset_index(inplace=True)
cities_count = cities_count.rename(columns = {'index':'city_id'})
cities_df.reset_index(inplace=True)
cities_df = cities_df.rename(columns = {'index':'city_org_id'})

# convert id and city label columns into lists
city_id_no = list_dedupe(cities_count['city_id'].tolist())
city_name = list_dedupe(cities_count['address_city'].tolist())

city_org_id_no = list_dedupe(cities_df['city_org_id'].tolist())
city_org_name = list_dedupe(cities_df['Organization'].tolist())

# remove added index column in cities df
cities_df.drop('city_org_id', inplace=True, axis=1)
cities_count.drop('city_id', inplace=True, axis=1)

# zip to join the lists and dict function to convert into dicts
city_dict = dict(zip(city_id_no, city_name))
city_org_dict = dict(zip(city_org_id_no, city_org_name))
# compare dicts - matching when city name appears as a substring in the full city org name
city_names_df = pd.DataFrame(columns=['City ID No.','address_city', 'City Org ID No.','City Org', 'Match']) # initiate empty df

for ID, seq1 in city_dict.items():
    for ID2, seq2 in city_org_dict.items():
        m = re.search(seq1, seq2) # match string with regex search 
        if m:
            match = m.group()
            # Append rows in Empty Dataframe by adding dictionaries 
            city_names_df = city_names_df.append({'City ID No.': ID, 'address_city': seq1, 'City Org ID No.': ID2, 'City Org': seq2, 'Match' : match}, ignore_index=True)
            
# subset for city to city org name matches
city_names_df = city_names_df.loc[:,['address_city','City Org']]
cities_count  = pd.merge(cities_count, city_names_df, on='address_city', how='left')
cities_count = cities_count[['state','address_city', 'City Org']].rename(columns= { 'City Org': 'Organization'})


In [None]:
# join coordinates to cities count
def onlyUS(df):
    data = df.loc[df['Country']=='United States of America'] 
    return data 
cities_count = pd.merge(left=cities_count, right=cities_meta_df, how='left', on=['address_city', 'state'])
cities_count.loc[(cities_count['state']=='PA') & (cities_count['address_city']=='York'),'Organization'] ='City of Philadelphia'
Cities20= onlyUS(mergedcity20)
#cities_count = cities_count.rename(columns ={'address_city': 'City'})
US_cities20 = pd.merge(Cities20,cities_count, how = 'right', on=['Organization'])


In [None]:
US_cities20 = US_cities20[US_cities20['Account Number'].notna()]

In [None]:

#graph1
def grp1(df):
    data = df.groupby(["City", "state"]).size().reset_index(name='Total')
    return data 
City = grp1(US_cities20)
Cityfig1 = px.treemap(City,path=[px.Constant('US Cities'),'state','City'], values = "Total", color = 'state',
                      title = 'Distribution of CDP Cities (US -2020)')
Cityfig1.show()

#### Energy 
1. Does your city have a renewable energy or electricity target? 


In [None]:
#cities8 = US_cities20.loc[US_cities20['Question Number'].str.startswith('8.0', na=False) & (US_cities20["Column Number"]==2.0)]
cities8 = US_cities20[US_cities20['Question Number']=='8.0']
cities8 = cities8.groupby(['Response Answer','state']).size().reset_index(name = 'Total')
yes_inprogress = cities8[(cities8['Response Answer']=='Yes') | (cities8['Response Answer']=='Intending to undertake in the next 2 years') |(cities8['Response Answer']=='In progress')]
nope = cities8[cities8['Response Answer']=='Not intending to undertake']
fig = px.bar(cities8, x="Response Answer", y="Total",
          color="Response Answer", title = "Overall Response of Cities' Energy target/plan"
           )
yes = px.treemap(yes_inprogress, path=[px.Constant('US Cities'),'state','Response Answer'], values = "Total", color = 'state',
                      title = 'Reported having renewable energy or electricity target OR (in progress/intending to undertake)')
no = px.treemap(nope, path=[px.Constant('US Cities'),'state','Response Answer'], values = "Total", color = 'state',
                      title = 'Cities who reported having no energy target and not intending to undertake')
fig.show();
yes.show();
no.show();

In [None]:
def converttostr(input_seq, seperator):
    seperator = ' '
   # Join all the strings in list
    final_str = seperator.join(input_seq)
    return final_str

#### Details of your renewable energy or electricity target(s) and how the city plans to meet those targets.

1. Scale and Type 

In [None]:

cities8a = US_cities20[US_cities20['Question Number']=='8.0a']
cities8a = cities8a.loc[cities8a['Response Answer'].notnull()]   
cities8a1 = cities8a.loc[(cities8a['Column Number']==1.0)]
cities8a2 = cities8a[cities8a['Column Number']==2.0]
cities8a3 = cities8a[cities8a['Column Number']==3.0]
cities8a1 = cities8a1.groupby(['Column Number', 'Response Answer']).size().reset_index(name = 'Total')
cities8a2 = cities8a2.groupby(['Column Number', 'Response Answer']).size().reset_index(name = 'Total')
cities8a3 = cities8a3.groupby(['Column Number', 'Response Answer']).size().reset_index(name = 'Total')
fig83 = px.bar(cities8a3, x = 'Response Answer', y = 'Total', color = 'Response Answer',barmode = 'group', height = 400, width = 1333)
fig81 = px.bar(cities8a1, x="Response Answer", y="Total",
             color='Response Answer', barmode='group',
             height=400, width = 777)
fig82 =  px.bar(cities8a2, x="Response Answer", y="Total",
             color='Response Answer', barmode='group',
             height=400, width = 777)
fig83.update_layout(
    title = 'Energy/electricity types covered by target',
    xaxis_visible=False)
fig81.update_layout(
    title = 'Energy Target Scale',
    xaxis_visible=False)
fig82.update_layout(
    title = 'Energy Target Type',
    xaxis_visible=False)


In [None]:
comment8a = cities8a.loc[(cities8a['Column Number']==4.0)|(cities8a['Column Number']==5.0)|(cities8a['Column Number']==6.0)|(cities8a['Column Number']==7.0)
                         |(cities8a['Column Number']==8.0)
                         |(cities8a['Column Number']==9.0)|(cities8a['Column Number']==10.0)|(cities8a['Column Number']==11.0)]

comment8a = comment8a['Response Answer']
seperator = ' '
comment8a = converttostr(comment8a, seperator)
# Generate the image
app_words = ['https', 'Question not applicable', 'applicable', 'not', 'Question','consideration', 'approximately63', "O'Shaughnessy", 'participating', 'percentage']
STOPWORDS.update(app_words)

plt.figure(figsize=(11, 11))

c8response= WordCloud(max_words=100, min_word_length=11, background_color='black', colormap='Set2',collocations=False, stopwords = STOPWORDS).generate(comment8a)
plt.imshow(c8response, interpolation='bilinear', alpha = 1)
plt.title('Word Cloud analysis of of the cities plan to meet their Energy/Electricity Target')


### EDA of CDP Corporation Data sets

In [None]:
#discloser >>water Security
cordiswater18=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/2018_Corporates_Disclosing_to_CDP_Water_Security.csv")
cordiswater19=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/2019_Corporates_Disclosing_to_CDP_Water_Security.csv")
cordiswater20=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Water Security/2020_Corporates_Disclosing_to_CDP_Water_Security.csv")

#climate discloser
#discloser >>Climate Change
cordisclim18=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2018_Corporates_Disclosing_to_CDP_Climate_Change.csv")
cordisclim19=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2019_Corporates_Disclosing_to_CDP_Climate_Change.csv")
cordisclim20=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Disclosing/Climate Change/2020_Corporates_Disclosing_to_CDP_Climate_Change.csv")

In [None]:
#REsponse> water Security
corwater18=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Water Security/2018_Full_Water_Security_Dataset.csv")
corwater19=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Water Security/2019_Full_Water_Security_Dataset.csv")
corwater20=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Water Security/2020_Full_Water_Security_Dataset.csv")

#Climate
#REsponse> Climate Change
corclim18=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/2018_Full_Climate_Change_Dataset.csv")
corclim19=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/2019_Full_Climate_Change_Dataset.csv")
corclim20=pd.read_csv("../input/cdp-unlocking-climate-solutions/Corporations/Corporations Responses/Climate Change/2020_Full_Climate_Change_Dataset.csv")


In [None]:
def mergeCORPsurveydis(df1, df2):
    mergecity = pd.merge(df1, df2, how = 'right', on = 'account_number')
    return mergecity

In [None]:
#merge corporations discloser data + survey data (WATER)
ds18 = mergeCORPsurveydis(cordiswater18, corwater18)
ds19 = mergeCORPsurveydis(cordiswater19, corwater19)
ds20 = mergeCORPsurveydis(cordiswater20, corwater20)


## merge corp discloser data + survey data (Climate_change)
corpclimate18 = mergeCORPsurveydis(cordisclim18, corclim18)
corpclimate19 = mergeCORPsurveydis(cordisclim19, corclim19)
corpclimate20 = mergeCORPsurveydis(cordisclim20, corclim20)

In [None]:
#graph1
def grp1(df):
    data = df.groupby(['country', 'primary_industry', 'survey_year_x']).size().reset_index(name='Total')
    data = data.rename(columns={'primary_industry': 'industry', 'survey_year_x': 'Year'})
    return data 


## WATER
dfwater18 = grp1(ds18)
dfwater19=grp1(ds19)
dfwater20 = grp1(ds20)
dfwater = pd.concat([dfwater18, dfwater19, dfwater20])

###CLIMATE CHANGE
dfclimate18 = grp1(corpclimate18)
dfclimate19 = grp1(corpclimate19)
dfclimate20= grp1(corpclimate20)
dfclimate = pd.concat([dfclimate18, dfclimate19, dfclimate20])


##### 1. Distribution of Industry Discloser by Country from 2018 to 2020 (Water Security)

In [None]:
fig = px.bar(dfwater, x='industry', y='Total', color='country', facet_row='Year',
                 color_discrete_map={
                "Canada": px.colors.qualitative.Dark24[21],
                "United States of America": px.colors.qualitative.Dark24[17]})
        
fig.update_layout(
    height=800,
    xaxis_title="",
    yaxis_title="Total",    
    font=dict(
        family="Courier New",
        size=15,
        color=px.colors.qualitative.Dark24[13],
        
        
))


fig.show();

##### 2. Distribution of Industry Discloser by Country from 2018 to 2020 (Climate Change)

In [None]:
fig = px.bar(dfclimate, x='industry', y='Total', color='country', facet_row='Year',
                 color_discrete_map={
                "Canada": px.colors.qualitative.Pastel[9],
                "United States of America": px.colors.qualitative.Antique[3]})
        
fig.update_layout(
    height=800,
    xaxis_title="",
    yaxis_title="Total",    
    font=dict(
        family="Courier New",
        size=15,
        color=px.colors.qualitative.Safe[7],
        
        
))


fig.show();


### EDA of US Corp Data Sets(2020)


##### Corporation overlap between Water_Security and Climate_Change data sets.

Below Venn diagram shows 248 US companies participated in both Water and Climate survey for year 2020. 


In [None]:
def onlyUS(df):
    data = df.loc[df['country']=='United States of America'] 
    return data 

water20=onlyUS(ds20)
climate20=onlyUS(corpclimate20)


### account overlap (VENN DIagram)
USwater20 = water20[['account_number']]    
USclimate20 = climate20[['account_number']]

venn2([set(USwater20['account_number']), set(USclimate20['account_number'])],
      set_labels = ('Water_Security', 'Climate_change'), set_colors=('purple', 'skyblue'), alpha = 0.7)

plt.title('Overlap of account number')
plt.show()

### EDA of Climate Survey Data (US,2020)

In [None]:
c2 = climate20.loc[climate20['question_number'].str.startswith('C6.1', na=False) & (climate20["column_number"]==2.0)]
c6= climate20.loc[(climate20['question_number']=='C6.1') & (climate20["column_number"]==2.0)]
#What were your organization’s gross global Scope 1 emissions in metric tons CO2e?

c2 = climate20.loc[climate20['question_number'].str.startswith('C6.', na=False) & (climate20["column_number"]==2.0)]
### TO BE CONTINUED >>>>>>> 
#Will continue with my analysis  later, not enough time :( 