# A Data Story Regarding Global Warming and Penguins

In [83]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)
from plotly.subplots import make_subplots

# Import packages
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import plotly.express as px
import seaborn as sns
import statsmodels.api as sm
import sys
!{sys.executable} -m pip install numpy



In [84]:
# Import datasets that will be used for the plots! Note that these datasets are already preprocessed (GitHub repo).
interpolated_data = pd.read_csv('interpolated_penguin_data.csv')
sealevels = pd.read_csv('sealevel_year.csv')
temp_df = pd.read_csv("Antarctica_temperatures.csv")
ice_df = pd.read_csv("iceberginfo_per_iceberg.csv")
new_temp_df = pd.read_csv("temp_per_year.csv")

# Perspective 1: Decreasing Penguin Habitat

## Correlation between global warming and Antarctica

In [85]:
temp_df_grouped = temp_df.groupby("Year").mean()

fig = px.scatter(temp_df_grouped, x="Year Code", y="Value", title='Temperature change in Antarctica (1961-2020)', 
                 labels={
                     "Year Code": "Year",
                     "Value": "Degree (C°)",
                 },
                 trendline='ols')
fig.update_traces(mode = 'lines')
fig.data[-1].line.color = 'red'
fig.show()

#### The plot shows how the average yearly temperature in Antarctica has changed between 1961 and 2020. The actual recorded annual average temperature is represented by the blue line. The red line is representing the trendline over time. The plot above appears to demonstrate an overall rise in Antarctica's average temperature throughout the 59-year span, pointing to a warming trend.

## Melting Iceberg A68A

In [86]:
ice_df = ice_df.groupby(["Iceberg", "Year"]).mean()
ice_df.to_csv("iceberg_per_year.csv")

ice_df = pd.read_csv("iceberg_per_year.csv")
spec_ice_df = ice_df[ice_df["Iceberg"] == "A68A"]

fig = px.bar(spec_ice_df, x="Year", y="Length (NM)", title='Average heights of icebergs in Antarctica', )
fig.show()

#### This barplot shows how the iceberg called A68A in the dataset, is melting over a 3 year period. We can see that the average height of the iceberg is decreasing.

## Correlation between melting icebergs and rising sea levels

In [87]:
fig = make_subplots(specs=[[{"secondary_y": True}]])

fig.add_trace(
    go.Scatter(x=sealevels["year"], y=sealevels["GMSL_GIA"], name="Global Mean Sea Level (with GIA)"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=sealevels["year"], y=new_temp_df["Value"], name="Temperature Change"),
    secondary_y=True,
)

fig.update_layout(
    title_text="Correlation between the temperature change in Antarctica and the average global sea levels (1993-2020)"
)

fig.update_xaxes(title_text="Year")

fig.update_yaxes(title_text="Sea level (mm)", secondary_y=False)
fig.update_yaxes(title_text="Degree (C°)", secondary_y=True)

fig.show()

#### The plot shows the correlation between changes in Antarctica's temperature and the average rise in the world's sea levels between 1993 and 2020. The left y-axis shows the global mean sea level in millimeters, the right y-axis shows the amount of temperature change in Celsius, and the x-axis represents the years. The "Global Mean Sea Level (with GIA)" line shows how the sea level has changed through time, whereas the "Temperature Change" line shows how the temperature of Antarctica has changed. The figure illustrates the trends of rising sea levels and temperatures, possibly suggesting a connection.

# Perspective 2: Endangerment Of The Penguin Population

## Correlation between rising sea levels and number of penguins

In [89]:
total_penguin_2020 = interpolated_data[interpolated_data['year'] == 2020].\
    groupby('common_name')['penguin_count'].sum()

fig = go.Figure()
fig.add_trace(go.Pie(
    labels=total_penguin_2020.index,
    values=total_penguin_2020,
    textinfo='label+percent',
    textposition='outside',
    marker= {
      'colors': [
        '#11243B',
        '#EBF1F9',
        '#4682B4',
        '#B0C4DE',
        '#28548A',
        '#4D85CB',
      ]
    },
    hole=0.4,
    pull=[0.1, 0, 0, 0]
))

fig.update_layout(
    title="Distribution of Penguin Species",
    showlegend=False,
    height=600
)

fig.show()

#### The donut pie chart reveals that the distribution of penguin species in our dataset is not equal. The Adélie penguin stands out as the most prominent species, followed by the Chinstrap, Emperor, Gentoo, Macaroni, and King penguin. Each species is labeled with the corresponding percentage, providing a clearer overview of the distribution among the different species. - Proofread

In [64]:
penguin_total = interpolated_data.groupby('year')['penguin_count'].sum()

fig = go.Figure([go.Bar(x=penguin_total.index, y=penguin_total, marker={'color': '#11243B'})])
fig.update_layout(title_text='Bar Plot of Total Penguin Count per Year', 
                  xaxis_tickangle=-45,
                  xaxis=go.layout.XAxis(
                    title="Year",
                    tickmode='linear',
                    tickangle = 45),
                  yaxis=go.layout.YAxis(
                    title="Total Penguins"),
                  height=550,
                  width= 1000
                 )
fig.show()

#### This bar plot visualizes the total count of penguins. The data is derived from the years 2002 to 2020, which were selected due to the absence of any data gaps between those years. It is evident that the penguin population shows a slight increase each year, aligning well with the overall trend. - Proofread

In [65]:
sealevels_range_2002_2020 = sealevels[(sealevels['year'] >= 2002) & (sealevels['year'] <= 2020)]
chinstrap_penguin = interpolated_data[interpolated_data['common_name'] == 'chinstrap penguin'].groupby('year')['penguin_count'].sum()

# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=chinstrap_penguin.index, y=chinstrap_penguin, name="Chinstrap Penguin Count", line=dict(color="#FF0000")),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=sealevels_range_2002_2020['year'], y=sealevels_range_2002_2020['GMSL_GIA'], name="Sealevels", line=dict(color="#11243B")),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Correlation between sealevels and the amount of penguins"
)

# Set x-axis title
fig.update_xaxes(title_text="<b>Year</b>", tickangle= 45)

# Set y-axes titles
fig.update_yaxes(title_text="<b>Penguin Count</b>", secondary_y=False)
fig.update_yaxes(title_text="<b>Sealevels</b>", secondary_y=True)

fig.update_layout(xaxis=dict(tickvals=sealevels_range_2002_2020['year']),
                autosize=False,
                width=990,
                height=600
            )

fig.show()

#### One notable observation in this plot is the consistent rise in sea level each year, accompanied by certain fluctuations. In addition to the rising sea level, there is a noticeable decline in the population of chinstrap penguins, also exhibiting some fluctuations. Initially, the decline was relatively small between 2002 and 2008. However, starting from that point, the chinstrap penguin population experienced a significant drop. - Proofread

## Correlation between rising sea levels and number of Gentoo penguins

In [67]:
gentoo_penguin = interpolated_data[interpolated_data['common_name'] == 'gentoo penguin'].groupby('year')['penguin_count'].sum()
sealevels_range_2002_2020['GMSL_GIA']

fig = px.scatter_3d(gentoo_penguin, x=gentoo_penguin.index, y=gentoo_penguin, z=sealevels_range_2002_2020['GMSL_GIA'])

fig.update_layout(scene=dict(xaxis_title='Years', yaxis_title='Penguin Count', zaxis_title='Sealevel'),
                  title='Gentoo Penguin Count and Sealevels by Year',
                  )

fig.show()

#### This figure presents an interactive 3D scatter plot graph illustrating the correlation between the count of Gentoo penguins and global sea levels from 2002 to 2020. The x-axis represents the years, while the y-axis denotes the count of Gentoo penguins within the range of 120-160k. Additionally, the z-axis displays the global sea levels measured on a scale of 0-60, representing the global mean sea level (GMSL) in millimeters relative to the 20-year TOPEX/Jason collinear. The graph reveals a consistent upward trend in both sea levels and the population of Gentoo penguins, suggesting a positive relationship between the two.

## Correlation between rising sea levels and number of Emperor penguins

In [66]:
emperor_penguin = interpolated_data[interpolated_data['common_name'] == 'emperor penguin'].groupby('year')['penguin_count'].sum()
sealevels_range_2002_2020['GMSL_GIA']

fig = px.scatter_3d(emperor_penguin.index, x='year', y=emperor_penguin, z=sealevels_range_2002_2020['GMSL_GIA'])

fig.update_layout(scene=dict(xaxis_title='Years', yaxis_title='Penguin Count', zaxis_title='Sealevel'),
                  title='Emperor Penguin Count and Sealevels by Year',
                  )

fig.show()

#### This figure showcases an interactive 3D scatter plot graph depicting the relationship between the count of Emperor penguins and global sea levels from 2002 to 2020. The x-axis represents the years, while the y-axis represents the count of Emperor penguins within the range of 240-290k. The z-axis displays the global sea levels measured on a scale of 0-60, representing the global mean sea level (GMSL) in millimeters relative to the 20-year TOPEX/Jason collinear. The graph demonstrates an upward trend in both sea levels and the population of Emperor penguins, indicating a positive association between the two variables.