# CO2 and Weather

## Formalia:

The analysis in this notebook, will explore the CO2 on a Worldwide and EU scale. Where a deeper look will be taken on the top 5 with respectively highest and lowest CO2 emissions. A comparison between EU and those countries will also be carried out. 

## Part 1: Development of CO2 emission in the world

In [None]:
# List of EU27 countries (excluding UK)
EU = ['Austria','Belgium','Bulgaria','Croatia','Cyprus','Czechia','Denmark','Estonia','Finland','France','Germany','Greece','Hungary','Ireland','Italy','Latvia','Lithuania','Luxembourg','Malta','Netherlands','Poland','Portugal','Romania','Slovakia','Slovenia','Spain','Sweden'] 

#### Preparing the data to fit the format needed for the analysis


In [None]:
# Import libraries need for the analysis
from math import ceil
import math
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import plotly.graph_objects as go
import plotly.express as px
import geojson
#Only two digit after the decimal point
pd.set_option('display.float_format', lambda x: '%.2f' % x)

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning) # https://stackoverflow.com/questions/15777951/how-to-suppress-pandas-future-warning

In [None]:
# Cleaning the data and renaming the columns
df= pd.read_csv('annual-co2-emissions-per-country.csv')
#replace NaN with 0
df = df.fillna(0)  
#replace inf with 0
df = df.replace([np.inf, -np.inf], 0)

# if Slovak Republic then replace with Slovakia
df.replace(to_replace='Slovak Republic', value='Slovakia', inplace=True)
# Czech Republic to Czechia
df.replace(to_replace='Czech Republic', value = 'Czechia', inplace=True)
df = df.drop(columns=['Code'])
df = df.rename(columns={'Entity': 'Country Name'})

# How does it look
df.head()

In [None]:
# Change the data to be in the format of a table
df = df.pivot(index='Country Name', columns='Year', values='Annual CO2 emissions')
df = df.reset_index()
df = df.rename(columns={'Country Name': 'Country Name'})
df = df.set_index('Country Name')
df = df.fillna(0)
# remove the word Year, so Country moved one up
df.columns.name = None
df = df.reset_index()

In [None]:
# Removing the years that are not needed - only looking at 1960 onwards
col_list = []
for year in range(1750, 1960):
    col_list.append(int(year))

# drop column thats in the col_list
df.drop(columns=col_list, axis=1, inplace=True)
df.head()

In [None]:
# Keep the original data safe and create a new dataframe for the EU countries
df_EU = df.copy()
df_EU = df_EU[df_EU['Country Name'].isin(EU)]

In [None]:
# Some basis statistics using stats for EU CO2 emission
df_EU.describe()


In [None]:
df_EU_yearly = df.copy()
df_EU_yearly = df_EU.melt(id_vars='Country Name', var_name='Year', value_name='CO2 Emission')
df_EU_yearly.head()


#### Visualization of the data

In [None]:
# Creates a bar chart for each country in the EU using supblot to show each country in a different plot
col = 3
rows = math.ceil(len(EU) / col)


fig, ax = plt.subplots(rows, col, sharex=True, figsize=(24, 15))

for index, country in enumerate(EU):
    plt.subplot(rows, col, index+1)
    plt.title(country)#, x = 0.1, y=0.7, pad=20) 
    plt.xlabel('Year')
    plt.ylabel('CO2 Emission')
    y_min, y_max = 0, df_EU_yearly[df_EU_yearly['Country Name'] == country]['CO2 Emission'].max()   
    #print(f'{country} --> y_min: {y_min}, y_max: {y_max}')
    plt.ylim([y_min, y_max])
    y_interval = int((y_max - y_min) / 5) if y_max > 5 else 5
    plt.yticks(range(0, int(y_max), y_interval))
    plt.xticks(rotation=90)
    # Bar chart for each country with the CO2 emission for each year
    plt.bar(df_EU_yearly[df_EU_yearly['Country Name'] == country]['Year'], df_EU_yearly[df_EU_yearly['Country Name'] == country]['CO2 Emission'])
   
    plt.tight_layout()
    plt.title('CO2 Emission for ' + country)
plt.tight_layout()

# save to html
plt.savefig('CO2_Emission_EU.png')
fig.suptitle('CO2 Emission for each country in the EU distribuated through the years 1960-2018', fontsize=16, y=1.05)
plt.show()

The plot above shows the CO2 development for each country in the EU, this plot illustrate which countries has the higest and lowest CO2 emission. It can be seen that throughout the years, Germany have the hiigest emissions, while a country like Malta has the lowest emissions.

In [None]:
# Total emission for world for each year
df_world = df.copy()
# should only have the Country World
df_world = df_world[df_world['Country Name'] == 'World']
# drop the Country World
df_world = df_world.sum(axis=0)
df_world = df_world.reset_index()
df_world.columns = ['Year', 'CO2 Emission']
#remove max_year and year_max
df_world = df_world.drop(df_world[df_world['Year'] == 'max_year'].index)
df_world = df_world.drop(df_world[df_world['Year'] == 'year_max'].index)
#plot the total CO2 emission for the world
fig = px.line(df_world, x='Year', y='CO2 Emission',title='CO2 Emission per year for the world from 1960-2022')
fig.show()
fig.write_html("CO2_Emission_world_1960_2022.html")

This plot shows the total CO2 emission for the world from 1960-2022. The plot shows that the CO2 emission has been increasing over the years.
Worth noting is 1973-1975, 1979-1980, 1991-1992, 2009 and 2020. Seems like something happen in those, since the curve breaks in those places.

In [None]:
# Timeserie with world co2 emission
df_world = df.copy()

list_world = ['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Cape Verde', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Czechia', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 
              'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Liechtenstein', 'Lithuania', 'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Mauritania', 'Mauritius', 'Mexico', 'Micronesia', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'North Korea', 'North Macedonia', 'Norway', 'Oman', 'Pakistan',
              'Palau', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar', 'Romania', 'Russia', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Korea', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria', 'Tajikistan', 'Tanzania', 'Thailand', 'Timor-Leste', 'Togo', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Vatican City', 'Venezuela', 'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe']

df_world = df_world[df_world['Country Name'].isin(list_world)]
df_world = df_world[~df_world['Country Name'].isin(EU)]
df_world_yearly = df_world.melt(id_vars='Country Name', var_name='Year', value_name='CO2 Emission')
#plot the world co2 emission
fig = px.line(df_world_yearly, x='Year', y='CO2 Emission', color='Country Name', title='CO2 Emission per year for each country in the world except EU countries')
fig.show()
# save interactive figure as html
fig.write_html("CO2_Emission_World.html")

This plot shows every country in the world except for the EU countries. It can clearly be seen the top 5 emitters in the world, but the top 5 lowest is harder.
To see the top 5 lowest emitters, it needs to be zoomed all the way in.

Top 5 highest = ['China', 'United States', 'India', 'Russia', 'Japan']

Top 5 lowest =  ['Tuvalu', 'Nauru', 'Kiribati', 'Sao Tome and Principe', 'Liechtenstein']

In [None]:
# Top 10 list with highest CO2 emission in 2018
top_5 = ['China', 'United States', 'India', 'Russia', 'Japan']
highest_country = df.copy()
highest_country =  highest_country[highest_country['Country Name'].isin(top_5)]
highest_country =highest_country.reset_index()
highest_country = highest_country.melt(id_vars='Country Name', var_name='Year', value_name='CO2 Emission')


df_EU_total = df_EU.copy()
df_EU_total = df_EU_total.drop(columns=['Country Name'])
df_EU_total = df_EU_total.sum(axis=0)
df_EU_total = df_EU_total.reset_index()
df_EU_total.columns = ['Year', 'CO2 Emission']
df_EU_total['Country Name'] = 'EU'

# Combine the data for the EU and the top 5 countries
df_combine = pd.concat([df_EU_total, highest_country])

#remove column with NaN
df_combine = df_combine.drop(df_combine[df_combine['Year'] == 'max_year'].index)
df_combine = df_combine.drop(df_combine[df_combine['Year'] == 'year_max'].index)
df_combine = df_combine.drop(df_combine[df_combine['Year'] == 'year_min'].index)

#plot the total CO2 emission for the world
fig = px.line(df_combine, x='Year', y='CO2 Emission', color='Country Name', title='CO2 Emission per year for the current top 5 emitters compared to EU from 1960-2018')
fig.show()
fig.write_html("CO2_top_5_vs-EU_emission_world_1960_2018.html")

This plot give a good visualization on how EU compares to the top 5 emitters in the world.
It can be seen that while China and India is increasing, the others countries is either stable or lowering it's CO2.

In [None]:
# Top 10 list with highest CO2 emission in 2018
lowest_top_5 = ['Tuvalu', 'Nauru', 'Kiribati', 'Sao Tome and Principe', 'Liechtenstein']
lowest_country = df.copy()
lowest_country =  lowest_country[lowest_country['Country Name'].isin(lowest_top_5)]
lowest_country =lowest_country.reset_index()
lowest_country = lowest_country.melt(id_vars='Country Name', var_name='Year', value_name='CO2 Emission')

# Combine the data for the EU and the top 5 countries
df_combine = pd.concat([df_EU_total, lowest_country])

#remove column with NaN
df_combine = df_combine.drop(df_combine[df_combine['Year'] == 'max_year'].index)
df_combine = df_combine.drop(df_combine[df_combine['Year'] == 'year_max'].index)
df_combine = df_combine.drop(df_combine[df_combine['Year'] == 'year_min'].index)

#plot the total CO2 emission for the world
fig = px.line(df_combine, x='Year', y='CO2 Emission', color='Country Name', title='CO2 Emission per year for the current lowest top 5 emitters compared to EU from 1960-2018')
fig.show()
fig.write_html("CO2_lowest_top_5_vs-EU_emission_world_1960_2018.html")

This plot clearly showcase, how much more EU emits compared to those in the lowest end. When EU is shown, the others isn't shown - EU have to be removed before the development of CO2 emission can be seen for those countries with the lowest CO" emissions.

In [None]:
# First first row is the year and first coloumn is the country name, make a plot where the x-axis is the year and the y-axis is the CO2 emission and make it so theres a subplot for each country
fig = px.line(df_EU_yearly, x='Year', y='CO2 Emission', color='Country Name', title='CO2 Emission per year for each country in the EU')
fig.show()
# save interactive figure as html
fig.write_html("CO2_Emission_EU.html")


This plot shows that Germany is clearly the highest emitter of CO2 throughout the years in EU. Again, need to zoom in to see the country with the lowest emissions.

In [None]:
# plotly bar chart with country name, max_year, year_max, min_year and year_min
fig = go.Figure(data=[ go.Bar(name='Min CO2 Emission', x=df_EU['Country Name'], y=df_EU['min_year'], text=df_EU['year_min'], textposition='auto'),
                      go.Bar(name='Max CO2 Emission', x=df_EU['Country Name'], y=df_EU['max_year'], text=df_EU['year_max'], textposition='auto')])
fig.update_layout(title='Max and Min CO2 Emission and Year of Max and Min CO2 Emission for EU countries', barmode='group')
fig.write_html("Bar_chart_Max_and_Min_CO2_Emission_and_Year_of_Max_and_Min_CO2_Emission_for_EU_countries.html")
fig.show()

A bar chart that shows when a country has it lowest and highest CO2 emission. Again Germany tops, while Matla is in the lowest end of the spectra.

In [None]:
# max value in df_EU_yearly
max_value = df_EU_yearly['CO2 Emission'].max()
# min value in df_EU_yearly
min_value = df_EU_yearly['CO2 Emission'].min()

In [None]:
with  open('europe.geojson') as response: #### Using the geojson of lat and lon of San Fransisco
    counties = geojson.load(response) 

fig = px.choropleth(df_EU_yearly, 
                           geojson=counties, 
                           locations='Country Name', 
                           featureidkey='properties.NAME',
                           color='CO2 Emission',
                           color_continuous_scale= "turbo", ## ????
                           hover_name = 'Country Name',
                           hover_data=  ['Year', 'CO2 Emission'],
                           animation_frame='Year',
                           #locationmode='geojson-id',
                           range_color=(min_value, max_value),
                           #mapbox_style="carto-positron",
                           scope='europe',
                           center = {"lat":52.52000660, "lon": 13.404954007},
                           #opacity=1,
                           #labels={'CO2 Emission':'CO2 Emission'}
                           width=1000, height=800,
                           title = 'CO2 for EU from 1960-2018'
                          )

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
#save as html
fig.write_html("CO2_Emission_EU_Map_Online.html", include_plotlyjs='cdn')
"""'tealrose', 'tempo', 'temps', 'thermal', 'tropic', 'turbid',
             'turbo', 'twilight', 'viridis', 'ylgn', 'ylgnbu', 'ylorbr',
             'ylorrd']."""

A great visualization of the development of CO2 for EU.

## Part 2: 
## Exploring CO2 Emission in Europe: A look into the history of Highs and Lows
The following is focusing on visiualisations that shows the highs and lows of the co2 emission history in Europe.
Line plots, and visualisation of geodata using plotly.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
from scipy.signal import find_peaks, find_peaks, find_peaks, find_peaks

# Only two digits after the decimal point
pd.set_option('display.float_format', lambda x: '%.2f' % x)

# Filter data for the years between 1900 and 2022
df_EU_yearly = df_EU_yearly[(df_EU_yearly['Year'] >= 1900) & (df_EU_yearly['Year'] <= 2022)]

# Total CO2 emissions for each year
total_CO2_each_year = df_EU_yearly.groupby('Year')['CO2 Emission'].sum()

# Find peaks in the data
peaks, _ = find_peaks(total_CO2_each_year.values)
valleys, _ = find_peaks(-total_CO2_each_year.values)

# Plot the total CO2 emission for each year
plt.figure(figsize=(15, 6))
plt.plot(total_CO2_each_year.index, total_CO2_each_year.values, linestyle='-')

# Plot peaks and valleys
plt.plot(total_CO2_each_year.index[peaks], total_CO2_each_year.values[peaks], marker='o', linestyle='', color='red', markersize=8)
plt.plot(total_CO2_each_year.index[valleys], total_CO2_each_year.values[valleys], marker='o', linestyle='', color='green', markersize=8)

plt.xlabel('Year')
plt.ylabel('Total CO2 Emission')
plt.title('Total CO2 Emission for each year (EU Countries)')
plt.xticks(total_CO2_each_year.index, rotation=90)  # Set x-axis ticks every year
plt.xlim(1960, 2022)
plt.grid()
plt.savefig('Total_CO2_Emission_for_each_year_EU.png')
plt.show()


This is a linear plot that showcases the total amount of CO2 emission in europe each year. We do see a high increase of CO2 emission in year 1979, while the lowest CO2 emission after 1979 was in year 2020. 

In [None]:
# Filter data for the year 1990
# Filter data for the year 1990
df_1979 = df_EU_yearly[df_EU_yearly['Year'] == 1979]

# max value in de_EU_yearly
max_value = df_EU_yearly['CO2 Emission'].max()
# min value in df_EU_yearly
min_value = df_EU_yearly['CO2 Emission'].min()

with  open('europe.geojson') as response: #### Using the geojson of lat and lon of San Fransisco
    counties = geojson.load(response) 

fig = px.choropleth(df_1979, 
                           geojson=counties, 
                           locations='Country Name', 
                           featureidkey='properties.NAME',
                           color='CO2 Emission',
                           color_continuous_scale= "turbo", ## ????
                           hover_name = 'Country Name',
                           hover_data=  ['Year', 'CO2 Emission'],
                           animation_frame='Year',
                           #locationmode='geojson-id',
                           range_color=(min_value, max_value),
                           #mapbox_style="carto-positron",
                           scope='europe',
                           center = {"lat":52.52000660, "lon": 13.404954007},
                           #opacity=1,
                           #labels={'CO2 Emission':'CO2 Emission'}
                           width=700, height=500,
                           title = 'CO2 for EU from 1960-2018'
                          )

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
#save as html
fig.write_html("CO2_Emission_EU_Map_Online.html", include_plotlyjs='cdn')
"""'tealrose', 'tempo', 'temps', 'thermal', 'tropic', 'turbid',
             'turbo', 'twilight', 'viridis', 'ylgn', 'ylgnbu', 'ylorbr',
             'ylorrd']."""

The colors on the map showcases the CO2 emission in EU in 1990. The reasoning behind showcasing CO2 emission for that year is because it is the year that EU experienced the most CO2 emission as seen in the Linear plot. When zooming in, we do see that Luxemborg is the only red country on the map, which means that Luxemborg is most likely to be the country in 1990 with the highest CO2 emission. Furthermore we see that Portugal is the country that is the most dark blue country on the map, and therefor is the country that most likely had the lowest CO2 emission in 1990. To make sure that these observations are correct, we just cross checked by doing the following. 

In [None]:
#Highest CO2 emission in 1990
highest_emission_row = df_1979.loc[df_1979['CO2 Emission'].idxmax()]

# Extract the country name and its corresponding CO2 emission value
highest_emission_country = highest_emission_row['Country Name']
highest_emission_value = highest_emission_row['CO2 Emission']

# Lowest CO2 emission in 1990
lowest_emission_row = df_1979.loc[df_1979['CO2 Emission'].idxmin()]

# Extract the country name and its corresponding CO2 emission value
lowest_emission_country = lowest_emission_row['Country Name']
lowest_emission_value = lowest_emission_row['CO2 Emission']

print("Country with the highest CO2 emission in 1990:", highest_emission_country)
print("CO2 emission value:", highest_emission_value)

print("\nCountry with the lowest CO2 emission in 1990:", lowest_emission_country)
print("CO2 emission value:", lowest_emission_value)


As seen in the print out statements, Luxemborg is the country that had the highest CO2 emission in year 1990, while Portugal was the country with the lowest CO2 emission. Another interesting perspective was to look into 2018 and see which country had the highest CO2 emission and the country with the CO2 emission with the lowest CO2 emission.

In [None]:
df_2020 = df_EU_yearly[df_EU_yearly['Year'] == '2020']
# Highest CO2 emission in 1990
highest_emission_row = df_2020.loc[df_2020['CO2 Emission'].idxmax()]

# Extract the country name and its corresponding CO2 emission value
highest_emission_country = highest_emission_row['Country Name']
highest_emission_value = highest_emission_row['CO2 Emission']

# Lowest CO2 emission in 1990
lowest_emission_row = df_2020.loc[df_2020['CO2 Emission'].idxmin()]

# Extract the country name and its corresponding CO2 emission value
lowest_emission_country = lowest_emission_row['Country Name']
lowest_emission_value = lowest_emission_row['CO2 Emission']

print("Country with the highest CO2 emission in 2020:", highest_emission_country)
print("CO2 emission value:", highest_emission_value)

print("\nCountry with the lowest CO2 emission in 2020:", lowest_emission_country)
print("CO2 emission value:", lowest_emission_value)


Luxemborg still remains as the EU country with the highest CO2 emission, while Malta in 2018 is now the EU country with the lowest CO2 emission. 

In [None]:
df_2015 = df_EU_yearly[df_EU_yearly['Year'] == '2015']
# Highest CO2 emission in 1990
highest_emission_row = df_2015.loc[df_2015['CO2 Emission'].idxmax()]

# Extract the country name and its corresponding CO2 emission value
highest_emission_country = highest_emission_row['Country Name']
highest_emission_value = highest_emission_row['CO2 Emission']

# Lowest CO2 emission in 1990
lowest_emission_row = df_2015.loc[df_2015['CO2 Emission'].idxmin()]

# Extract the country name and its corresponding CO2 emission value
lowest_emission_country = lowest_emission_row['Country Name']
lowest_emission_value = lowest_emission_row['CO2 Emission']

print("Country with the highest CO2 emission in 2015:", highest_emission_country)
print("CO2 emission value:", highest_emission_value)

print("\nCountry with the lowest CO2 emission in 2015:", lowest_emission_country)
print("CO2 emission value:", lowest_emission_value)

As seen in the line plot earlier, the year that had the lowest CO2 emission after 1990 was year 2015. The above showcases the highest and lowest CO2 emission by a country.

Luxemborg had once again the highest CO2 emission, while Romania had the lowest CO2 emission that year

## Part 3: Extreme weather 

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from bokeh.plotting import figure, show, output_file
from bokeh.models import Legend, HoverTool, ColumnDataSource
from bokeh.layouts import gridplot
from bokeh.models import FixedTicker

# For EU dataset 
excel_file_EU = 'EU_disasters.xlsx'

df = pd.read_excel(excel_file_EU)

df.drop(columns=['Historic', 'Classification Key','External IDs', 'Event Name','OFDA/BHA Response','Appeal','Declaration',"AID Contribution ('000 US$)", 'Magnitude','Magnitude Scale', 'Latitude','Longitude','River Basin','Start Day','Total Deaths','No. Injured','No. Affected','No. Homeless','Total Affected',"Reconstruction Costs ('000 US$)", 
                 "Reconstruction Costs, Adjusted ('000 US$)","Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)", "Total Damage ('000 US$)","Total Damage, Adjusted ('000 US$)",'CPI', "Admin Units","Entry Date", 'Last Update','Origin','Associated Types'], inplace=True)


df['Start Year'] = pd.to_datetime(df['Start Year'], format='%Y').dt.year

# For WorldWide dataset
excel_file_World = 'WW.xlsx'

df_world = pd.read_excel(excel_file_World)
df_world.drop(columns=['Historic', 'Classification Key','External IDs', 'Event Name','OFDA/BHA Response','Appeal','Declaration',"AID Contribution ('000 US$)", 'Magnitude','Magnitude Scale', 'Latitude','Longitude','River Basin','Start Day','Total Deaths','No. Injured','No. Affected','No. Homeless','Total Affected',"Reconstruction Costs ('000 US$)", 
                 "Reconstruction Costs, Adjusted ('000 US$)","Insured Damage ('000 US$)", "Insured Damage, Adjusted ('000 US$)", "Total Damage ('000 US$)","Total Damage, Adjusted ('000 US$)",'CPI', "Admin Units","Entry Date", 'Last Update','Origin','Associated Types'], inplace=True)


df_world['Start Year'] = pd.to_datetime(df['Start Year'], format='%Y').dt.year



# global values
generic_colors = ['red', 'yellow', 'purple', 'blue', 'green', 'black', 'gray', 'pink', 'brown',
                  'skyblue', 'darkorange', 'lightseagreen', 'cyan', 'magenta', 'lime', 'gold',
                  'indigo', 'teal', 'salmon', 'peru', 'olive', 'orchid', 'steelblue','black']

disaster_colors = {
    "Flood": "blue",
    "Storm": "red",
    "Earthquake": "green",
    "Epidemic": "orange",
    "Glacial lake outburst flood": "purple",
    "Extreme temperature" :"magenta",
    "Mass movement (wet)":"lime",
    "Volcanic activity":"indigo",
    "Wildfire":"steelblue",
    "Drought":"peru",
    "Impact": "teal",
    "Infestation":"olive",
    "Mass movement (dry)":"magenta",
}


In [None]:
###### Filter the two dataset to only consider the natural disasters ######


# For EU dataset 
filter_natural = df[df['Disaster Group'] == 'Natural']

grouped_natural_disaster = filter_natural.groupby('Disaster Group')

# For EU dataset 

filter_natural_world = df_world[df_world['Disaster Group'] == 'Natural']

grouped_natural_disaster_world = filter_natural_world.groupby('Disaster Group')

In [None]:
# Plot for the sum of all disasters each year in EU 

grouped_start_year = filter_natural.groupby('Start Year').size()

p = figure(title="Number of Natural Disasters in EU in each year", x_axis_label='Start Year', y_axis_label='Number of Natural Disasters in EU', width=800, height=400)

p.line(grouped_start_year.index, grouped_start_year.values, line_width=2)

p.xaxis.ticker = FixedTicker(ticks=grouped_start_year.index.tolist())
p.xaxis.major_label_orientation = np.pi / 4

hover = HoverTool()
hover.tooltips = [("Year", "@x"), ("Occurrences", "@y")]
p.add_tools(hover)

output_file("natural_disasters_all_year_plot_EU.html")

show(p)


In [None]:
# Plot occurences of all disaster type for EU and WW

# Defining the method

def plot_diaster_types(dataset, colors, string, title):
    p = figure(x_range=dataset.index.astype(str).tolist(), height=400, width=700, title=title,
            toolbar_location=None, tools="")

    disaster_types = dataset.columns.tolist()
    bar_width = 1 / len(disaster_types)  
    renderers = [] 

    for i, disaster_type in enumerate(disaster_types):
        x_pos = [year + i * bar_width for year in range(len(dataset))]
        source = ColumnDataSource(data=dict(x=x_pos, top=dataset[disaster_type], name=[disaster_type] * len(x_pos), year=dataset.index.astype(str).tolist()))

        renderer = p.vbar(x='x', top='top', width=bar_width, color=colors[disaster_type], source=source)
        renderers.append(renderer)  
    p.xaxis.axis_label = 'Year'
    p.yaxis.axis_label = 'Number of Disasters'
    p.xaxis.major_label_orientation = np.pi / 4

    hover = HoverTool(tooltips=[("Year", "@year"), ("Disaster Type", "@name"), ("Occurrence", "@top")], renderers=renderers)
    p.add_tools(hover)

    legend_items = [(disaster_type, [renderer]) for disaster_type, renderer in zip(disaster_types, renderers)]
    legend = Legend(items=legend_items, location="top_right", click_policy='hide')
    p.add_layout(legend, 'right')

    output_file(string + ".html")
    show(p)


# Plot  for EU
grouped_year = filter_natural.groupby(['Start Year', 'Disaster Type']).size().unstack(fill_value=0)
plot_diaster_types(grouped_year, disaster_colors, "plot_disaster_EU_all_years", "Number of Natural Disasters by Type and Year in EU")

# Plot for WW 
# grouped_year_world = filter_natural_world.groupby(['Start Year', 'Disaster Type']).size().unstack(fill_value=0)
# plot_diaster_types(grouped_year_world, disaster_colors, "plot_disaster_worldwide_all_years", "Number of Natural Disasters by Type and Year WorldWide")


Giving the previous plots, we see the countries and their CO2 emission. In the next section, the countries producing the most and least amount of CO2 are being highligted. For EU we see that Germany and Italy are the countries with the highest CO2 emission, while Malta and Latvia are the countries with the lowest CO2 emission. For Worldwide er have Qatar and Kuwait as the two countries having the most CO2 emission, while Somalia and Congo being the counties with the lowest. 

There has been selected a joker that do not spike in the high or lower end but would have a CO2 emission level somewhere in the middle of all of the countries in EU and the Wordwide. For EU Sweden is the joker, and for the whole world China has been chosen. 

In [None]:
# Plot for selected countries
grouped_country = filter_natural.groupby(['Country', 'Start Year', 'Disaster Type']).size().unstack(fill_value=0)

# Defining the method 
def disastertypes_country(dataset, countries, colors, name_of_file):
    plots = []

    num_countries = len(countries)
    plots_per_row = 2
    num_rows = (num_countries + plots_per_row - 1) // plots_per_row

    disaster_names = []

    for idx, country in enumerate(countries):
        country_data = dataset.loc[country]
        disaster_types = country_data.columns
        
        all_years = np.arange(country_data.index.min(), country_data.index.max() + 1)

        non_zero_disasters = [disaster_type for disaster_type in disaster_types if country_data[disaster_type].sum() > 0]

        p = figure(title=f'Number of Natural Disasters by Type and Year in {country}',
                    x_axis_label='Year', y_axis_label='Number of Disasters',
                    x_range=(min(all_years)-1, max(all_years)+1), width = 475, height = 250)

        plotted_disaster_types = []

        for i, disaster_type in enumerate(non_zero_disasters):
            x = all_years
            y = country_data[disaster_type].reindex(all_years, fill_value=0)
            source = ColumnDataSource(data=dict(x=x, y=y, name=[disaster_type] * len(x)))
            bars = p.vbar(x='x', top='y', width=0.8, color=colors[disaster_type], alpha=0.5, source=source)
            plotted_disaster_types.append((disaster_type, [bars]))
            disaster_names.append(disaster_type)  

        p.xaxis.ticker = all_years
        p.xaxis.major_label_orientation = np.pi / 4
        p.yaxis.ticker = np.arange(0, 6, 1)
        legend = Legend(items=plotted_disaster_types, location="top_left", click_policy="hide")
        legend.spacing = 5
        legend.label_text_font_size = "6pt"
        p.add_layout(legend, 'right')

        #tooltips = [("Year", "@x"), ("Disaster Type", "@name"), ("Occurrences", "@y")]
        # hover = HoverTool(tooltips=tooltips, mode="vline")
        # p.add_tools(hover)

        plots.append(p)

    grid = gridplot(plots, ncols=plots_per_row)

    output_file(name_of_file + ".html")
    show(grid)

    
#PLOTS FOR EU

#1. Highest CO2 emission: Germany
countries_to_include_estonia = ['Germany']
disastertypes_country(grouped_country, countries_to_include_estonia, disaster_colors, "Germany_EU")


#2. Highest CO2 emission: Italy 
countries_to_include_sweden = ['Italy']
disastertypes_country(grouped_country, countries_to_include_sweden, disaster_colors, "Italy_EU")


# 1. Lowest CO2 emission: Malta
countries_to_include_malta = ['Malta']
disastertypes_country(grouped_country, countries_to_include_malta, disaster_colors, "Malta_EU")

# 2. Lowest CO2 emission: Latvia
countries_to_include_malta = ['Latvia']
disastertypes_country(grouped_country, countries_to_include_malta, disaster_colors, "Latvia_EU")

#Joker
countries_to_include_italy = ['Sweden']
disastertypes_country(grouped_country, countries_to_include_italy, disaster_colors, "Sweden_EU")


# PLOTS FOR WW 
grouped_country_WW = filter_natural_world.groupby(['Country', 'Start Year', 'Disaster Type']).size().unstack(fill_value=0)

# Qatar, Kuwait cannot be found with the highest CO2 is not in the dataset 

# 1.Lowest CO2 emission: Somalia
countries_to_include_WW_Somalia = ['Somalia']
disastertypes_country(grouped_country_WW, countries_to_include_WW_Somalia, disaster_colors, "Somalia_WW")

#2.Lowest CO2 emission: Congo
countries_to_include_WW_Congo = ['Congo']
disastertypes_country(grouped_country_WW, countries_to_include_WW_Congo, disaster_colors, "Congo_WW")


# Joker
countries_to_include_WW_China = ['China']
disastertypes_country(grouped_country_WW, countries_to_include_WW_China, disaster_colors, "China_WW")


