In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt 
import calendar
import matplotlib.font_manager as fm 

In [None]:
# Load the "Jost" font needed for this plot
font_dir = ['./Jost_Font/']  # Font is located in Jost_Font folder
for font in fm.findSystemFonts(font_dir):
    print(font)
    fm.fontManager.addfont(font)

In [None]:
# Check if font was successfully loaded
fm.fontManager.findfont('Jost')

In [None]:
# Read in the datasets
data_global = pd.read_csv('global_temps.csv')
data_nh = pd.read_csv('nh_temps.csv')
data_sh = pd.read_csv('sh_temps.csv')

In [None]:
data_global.head()  # Check if global data was successfully loaded

In [None]:
data_nh.head()  # Check if northern hemisphere data was successfully loaded

In [None]:
data_sh.head()  # Check if southern hemisphere data was successfully loaded

In [None]:
# Melt the data -> create a column "Month"
data = {}
data.update({
    'global': data_global.melt(id_vars="Year", var_name="Month", value_name="value"),
    'nh': data_nh.melt(id_vars="Year", var_name="Month", value_name="value"),
    'sh': data_sh.melt(id_vars="Year", var_name="Month", value_name="value")
    })

In [None]:
# Clean up and refine melted data

for key, df in data.items():
    # Filter rows where 'Month' column contains correct month abbreviations
    df = df[df['Month'].isin(calendar.month_abbr[1:])]  

    # Convert these abbreviations to lowercase
    df.loc[:, 'Month'] = df['Month'].str.lower()  

    # Remove rows with missing value
    df = df.dropna(subset=['value'])  

    # Create a new 'date' column by combining 'Year' and 'Month' columns
    df['date'] = df['Year'].astype(str) + '-' + df['Month'] + '-1'  

    # Convert 'date' column to datetime format
    df['date'] = pd.to_datetime(df['date'], format='%Y-%b-%d') 

    # Create new 'date_float' column with float representation of datetime objects 
    df['date_float'] = df['date'].apply(lambda x: x.timestamp())  

    # Store back in data dict
    data[key] = df

In [None]:
# Calculations for polynomial regression fitting

data_mean = {}
for key, df in data.items():
    # Fit a polynomial curve to the data
    degree = 8  # Eight degree seems to fit the original the best
    coefficients = np.polyfit(df['date_float'], df['value'], degree)
    polynomial = np.poly1d(coefficients)

    # Create x values for the smooth curve
    x_smooth = np.linspace(df['date_float'].min(), df['date_float'].max(), 1000)

    # Evaluate the polynomial at the x_smooth values to get the corresponding y_smooth values
    y_smooth = polynomial(x_smooth)

    # Store the curve data in new 'data_mean' dict
    data_mean[key] = ([pd.to_datetime(date, unit='s') for date in x_smooth], y_smooth)

In [None]:
# Colors eyedropped from original plot
styleguide = {
    'global': {'color': '#fc6f06'},
    'nh': {'color': '#c11700'},
    'sh': {'color': '#008ea9'},
}

# The error bands from the original plot could not be recreated with appropriate effort, so a visibly best fitting value is chosen here
mock_error = 0.03

# The R.ggplot API is not very well documented, therefore after 1.5 hours of trying to recreate the error-bands I settled with using a constant value

In [None]:
# Create figure and axis
f, ax = plt.subplots(figsize=(36, 27), dpi=300)

# Plot the scatterplot, as well as the regression curves
for key, df in data.items():
    ax.scatter(x='date', y='value', data=df, s=50, edgecolors='white', **styleguide[key], zorder=90)
    ax.plot(data_mean[key][0], data_mean[key][1], linewidth=5, **styleguide[key], zorder=99)

# Plot the error-bands around the regression curve
for key, df in data.items():
    ax.fill_between(data_mean[key][0], data_mean[key][1]-mock_error, data_mean[key][1]+mock_error, alpha=0.4, linewidth=0, **styleguide[key], zorder=98)

# Set the margins of the x-Axis to zero -> Plot starts where data starts
ax.margins(x=0)

# Set x-Ticks and x-Ticks-Labels to every first year of the decade
xticks = [pd.Timestamp(year=year, month=1, day=1) for year in range(1880, 2030, 10)]
xticks_labels = [f'{tick.year}' for tick in xticks]
ax.set_xticks(xticks, minor=False)
ax.set_xticklabels(xticks_labels)

# Set y-Ticks to the four values from original and add "°C" to y-Ticks-Labels
yticks = [-1, 0, 1, 2]
yticks_labels = [f'{tick} °C' for tick in yticks]
ax.set_yticks(yticks, minor=False)
ax.set_yticklabels(yticks_labels)

# Format the ticks and the tick-labels
ax.tick_params(which='major', labelsize=21, labelfontfamily='Jost', labelcolor='dimgrey') # Style all major tick labels
ax.tick_params(axis='y', which='major', pad=15, color='darkgrey', length=7, width=1.5) # Style y-Axis major ticks
ax.tick_params(axis='x', which='major', color='black', length=7, width=1.5) # Style x-Axis major ticks

# Set y-Axis tick labels to use bold font
for tick in ax.get_yticklabels():
    tick.set_fontweight('bold')

# Set the grid to use custom dash style
ax.grid(axis="x", color="darkgrey", linestyle=(0, (5, 5)), linewidth=1.5)
ax.grid(axis="y", color="darkgrey", linestyle=(0, (5, 5)), linewidth=1.5)

# Make bottom spine thicker and disable all other spines
ax.spines.bottom.set(color='black', linewidth=1.5)
ax.spines.top.set_visible(False)
ax.spines.left.set_visible(False)
ax.spines.right.set_visible(False)

# Set title
f.suptitle('Global surface temperatures', x=0.33, y=0.97, fontproperties={'family': 'Jost', 'size': 80, 'weight': 'bold'})

# Set subtitle (there might be a better way, but I could not find a way to have differently colored text in one object...)
f.text(x=0.126, y=0.91, s="Monthly average surface temperatures at a           scale, as well as in the               and               hemispheres.", 
       color='dimgrey', fontproperties={'family': 'Jost', 'size': 27, 'weight': 'regular'})
f.text(x=0.3145, y=0.91, s="global", color='#fc6f06', fontproperties={'family': 'Jost', 'size': 27, 'weight': 'semibold'})
f.text(x=0.447, y=0.91, s="northern", color='#c11700', fontproperties={'family': 'Jost', 'size': 27, 'weight': 'semibold'})
f.text(x=0.5105, y=0.91, s="southern", color='#008ea9', fontproperties={'family': 'Jost', 'size': 27, 'weight': 'semibold'})

# Set caption (same as above, the caption uses different font weight and I could not find a way of doing this in one object)
f.text(x=0.6565, y=0.072, s="Source:", fontproperties={'family': 'Jost', 'size': 14, 'weight': 'regular'})
f.text(x=0.6758, y=0.072, s="NASA GISS Surface Temperature Analysis (GISTEMP v4)", fontproperties={'family': 'Jost', 'size': 14, 'weight': 'bold'})
f.text(x=0.8225, y=0.072, s="| Graphic:", fontproperties={'family': 'Jost', 'size': 14, 'weight': 'regular'})
f.text(x=0.848, y=0.072, s="Nikolaos Pechlivanis", fontproperties={'family': 'Jost', 'size': 14, 'weight': 'bold'})

# Save the figure -> the output "plot.png" looks a lot closer to the original than the one displayed in this notebook
plt.savefig('plot.png', bbox_inches='tight', pad_inches=0.7)
