In [1]:
import pandas as pd
from datapackage import Package
from pathlib import Path

package = Package('https://datahub.io/core/glacier-mass-balance/datapackage.json')

# print list of all resources:
print(package.resource_names)

# print processed tabular data (if exists any)
for resource in package.resources:
    if resource.descriptor['datahub']['type'] == 'derived/csv':
        # Read the CSV data from the resource into a pandas DataFrame
        glacier_mass_change_df = pd.read_csv(resource.descriptor['path'])

['validation_report', 'glaciers_csv', 'glaciers_json', 'glacier-mass-balance_zip', 'glaciers']


In [2]:
glacier_mass_change_df.dtypes

Year                              int64
Mean cumulative mass balance    float64
Number of observations          float64
dtype: object

In [3]:
sea_levels_package = Package('https://datahub.io/core/sea-level-rise/datapackage.json')

# print list of all resources:
print(sea_levels_package.resource_names)

# print processed tabular data (if exists any)
for resource in sea_levels_package.resources:
    if resource.descriptor['datahub']['type'] == 'derived/csv':
        # Read the CSV data from the resource into a pandas DataFrame
        sea_level_change_df = pd.read_csv(resource.descriptor['path'])

['validation_report', 'csiro_alt_gmsl_mo_2015_csv', 'csiro_alt_gmsl_yr_2015_csv', 'csiro_recons_gmsl_mo_2015_csv', 'csiro_recons_gmsl_yr_2015_csv', 'epa-sea-level_csv', 'csiro_alt_gmsl_mo_2015_json', 'csiro_alt_gmsl_yr_2015_json', 'csiro_recons_gmsl_mo_2015_json', 'csiro_recons_gmsl_yr_2015_json', 'epa-sea-level_json', 'sea-level-rise_zip', 'csiro_alt_gmsl_mo_2015', 'csiro_alt_gmsl_yr_2015', 'csiro_recons_gmsl_mo_2015', 'csiro_recons_gmsl_yr_2015', 'epa-sea-level']


In [4]:
sea_level_change_df.dtypes

Year                         object
CSIRO Adjusted Sea Level    float64
Lower Error Bound           float64
Upper Error Bound           float64
NOAA Adjusted Sea Level     float64
dtype: object

In [5]:
# Convert the date_col column to a datetime data type
sea_level_change_df['Year'] = pd.to_datetime(sea_level_change_df['Year'])

# Drop the month and day and keep the year
sea_level_change_df['Year'] = sea_level_change_df['Year'].dt.year

# View updated DataFrame
sea_level_change_df

Unnamed: 0,Year,CSIRO Adjusted Sea Level,Lower Error Bound,Upper Error Bound,NOAA Adjusted Sea Level
0,1880,0.000000,-0.952756,0.952756,
1,1881,0.220472,-0.732283,1.173228,
2,1882,-0.440945,-1.346457,0.464567,
3,1883,-0.232283,-1.129921,0.665354,
4,1884,0.590551,-0.283465,1.464567,
...,...,...,...,...,...
130,2010,8.901575,8.618110,9.185039,8.122973
131,2011,8.964567,8.661417,9.267717,8.053065
132,2012,9.326772,8.992126,9.661417,8.457058
133,2013,8.980315,8.622047,9.338583,8.546648


In [6]:
sea_level_change_df.dtypes

Year                          int64
CSIRO Adjusted Sea Level    float64
Lower Error Bound           float64
Upper Error Bound           float64
NOAA Adjusted Sea Level     float64
dtype: object

In [7]:
# Use pd.to_datetime() to create a datetime index with the year column as the only component
sea_level_change_df.index = pd.to_datetime(sea_level_change_df['Year'], format='%Y')

# Drop the original year column
sea_level_change_df.drop('Year', axis=1, inplace=True)

In [8]:
sea_level_change_df.drop(columns = ['NOAA Adjusted Sea Level'], inplace=True)

In [9]:

# Use pd.to_datetime() to create a datetime index with the year column as the only component
glacier_mass_change_df.index = pd.to_datetime(glacier_mass_change_df['Year'], format='%Y')

# Drop the original year column
glacier_mass_change_df.drop('Year', axis=1, inplace=True)

glacier_mass_change_df

Unnamed: 0_level_0,Mean cumulative mass balance,Number of observations
Year,Unnamed: 1_level_1,Unnamed: 2_level_1
1945-01-01,0.000,
1946-01-01,-1.130,1.0
1947-01-01,-3.190,1.0
1948-01-01,-3.190,1.0
1949-01-01,-3.820,3.0
...,...,...
2010-01-01,-25.158,37.0
2011-01-01,-26.294,37.0
2012-01-01,-26.930,36.0
2013-01-01,-27.817,31.0


In [10]:
glacier_mass_change_df.info

<bound method DataFrame.info of             Mean cumulative mass balance  Number of observations
Year                                                            
1945-01-01                         0.000                     NaN
1946-01-01                        -1.130                     1.0
1947-01-01                        -3.190                     1.0
1948-01-01                        -3.190                     1.0
1949-01-01                        -3.820                     3.0
...                                  ...                     ...
2010-01-01                       -25.158                    37.0
2011-01-01                       -26.294                    37.0
2012-01-01                       -26.930                    36.0
2013-01-01                       -27.817                    31.0
2014-01-01                       -28.652                    24.0

[70 rows x 2 columns]>

In [11]:
# Data Read-In of global_annual_mean_temp.csv
annual_mean_temp_df = pd.read_csv('./data/global_annual_mean_temp.csv')

In [12]:
annual_mean_temp_df = annual_mean_temp_df.drop(columns=['Lowess(5)'])
annual_mean_temp_df.columns = ['Year', 'Temperature']
annual_mean_temp_df.head()

Unnamed: 0,Year,Temperature
0,1880,-0.17
1,1881,-0.09
2,1882,-0.11
3,1883,-0.18
4,1884,-0.28


In [30]:
# Read-in .csv file
global_emissions_df = pd.read_csv('./data/co2-emissions.csv')

# Convert 'Year' column to datetime object
global_emissions_df['Year'] = pd.to_datetime(global_emissions_df['Year'], format='%Y')

# Drop unnecessary column: Code
global_emissions_df.drop(columns=['Code'], inplace=True)

# Rename columns
global_emissions_df.columns = ['Location', 'Year', 'Emissions']

# Filter rows
global_emissions_df = global_emissions_df.loc[global_emissions_df['Location'] == 'World']

#Set Index
global_emissions_df.set_index('Year', inplace=True)

# Drop 'Location' column
global_emissions_df.drop(columns=['Location'], inplace=True)


# Print the updated DataFrame
print(global_emissions_df.head())


            Emissions
Year                 
1750-01-01  9350528.0
1751-01-01  9350528.0
1752-01-01  9354192.0
1753-01-01  9354192.0
1754-01-01  9357856.0


In [31]:
global_emissions_df.dtypes

Emissions    float64
dtype: object

In [32]:
annual_mean_temp_df.index = pd.to_datetime(annual_mean_temp_df['Year'], format='%Y')

# Drop the original year column
annual_mean_temp_df.drop('Year', axis=1, inplace=True)

annual_mean_temp_df

Unnamed: 0_level_0,Temperature
Year,Unnamed: 1_level_1
1880-01-01,-0.17
1881-01-01,-0.09
1882-01-01,-0.11
1883-01-01,-0.18
1884-01-01,-0.28
...,...
2018-01-01,0.85
2019-01-01,0.98
2020-01-01,1.02
2021-01-01,0.84


In [33]:
# Concatenate the DataFrames vertically with the index preserved
combined_df = pd.concat([glacier_mass_change_df, sea_level_change_df, annual_mean_temp_df, global_emissions_df], axis=1)


In [34]:
# Print the resulting DataFrame
combined_df.tail(30)

Unnamed: 0_level_0,Mean cumulative mass balance,Number of observations,CSIRO Adjusted Sea Level,Lower Error Bound,Upper Error Bound,Temperature,Emissions
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1993-01-01,-14.695,37.0,6.291339,6.027559,6.555118,0.23,22812900000.0
1994-01-01,-15.276,37.0,6.5,6.23622,6.76378,0.32,22969500000.0
1995-01-01,-15.486,37.0,6.61811,6.354331,6.88189,0.45,23458710000.0
1996-01-01,-15.89,37.0,6.787402,6.523622,7.051181,0.33,24159380000.0
1997-01-01,-16.487,37.0,7.066929,6.80315,7.330709,0.46,24302580000.0
1998-01-01,-17.31,37.0,6.665354,6.393701,6.937008,0.61,24213260000.0
1999-01-01,-17.697,37.0,7.011811,6.73622,7.287402,0.38,24732390000.0
2000-01-01,-17.727,37.0,7.062992,6.791339,7.334646,0.39,25453620000.0
2001-01-01,-18.032,37.0,7.287402,7.019685,7.555118,0.53,25668050000.0
2002-01-01,-18.726,37.0,7.38189,7.110236,7.653543,0.63,26281040000.0


In [35]:
# Drop null values
combined_df = combined_df.dropna()
combined_df

Unnamed: 0_level_0,Mean cumulative mass balance,Number of observations,CSIRO Adjusted Sea Level,Lower Error Bound,Upper Error Bound,Temperature,Emissions
Year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1946-01-01,-1.130,1.0,3.251969,2.834646,3.669291,-0.07,4.638339e+09
1947-01-01,-3.190,1.0,3.374016,2.996063,3.751968,-0.02,5.138490e+09
1948-01-01,-3.190,1.0,3.562992,3.196850,3.929134,-0.10,5.415710e+09
1949-01-01,-3.820,3.0,3.511811,3.181102,3.842520,-0.11,5.245849e+09
1950-01-01,-4.887,3.0,3.598425,3.287402,3.909449,-0.17,6.003272e+09
...,...,...,...,...,...,...,...
2009-01-01,-24.459,37.0,8.586614,8.311024,8.862205,0.65,3.156403e+10
2010-01-01,-25.158,37.0,8.901575,8.618110,9.185039,0.72,3.336435e+10
2011-01-01,-26.294,37.0,8.964567,8.661417,9.267717,0.61,3.448701e+10
2012-01-01,-26.930,36.0,9.326772,8.992126,9.661417,0.65,3.500627e+10
