In [124]:
!pip install numpy jinja2 six requests tornado pyyaml python-dateutil bokeh



In [125]:
import pandas as pd
import numpy as np
from bokeh.plotting import figure, show
from bokeh.models import Range1d, HoverTool

In [126]:
# read the local weather data
local_weather = pd.read_csv('local_weather.csv')

# read the global weather data
global_weather = pd.read_csv('global_weather.csv')


In [127]:
#check the first values
local_weather.head(10)

Unnamed: 0,avg_temp,year,city
0,6.33,1743,Berlin
1,10.36,1744,Berlin
2,1.43,1745,Berlin
3,,1746,Berlin
4,,1747,Berlin
5,,1748,Berlin
6,,1749,Berlin
7,9.83,1750,Berlin
8,9.75,1751,Berlin
9,4.84,1752,Berlin


In [128]:
global_weather.head(10)

Unnamed: 0,year,avg_temp
0,1750,8.72
1,1751,7.98
2,1752,5.78
3,1753,8.39
4,1754,8.47
5,1755,8.36
6,1756,8.85
7,1757,9.02
8,1758,6.74
9,1759,7.99


In [129]:
# check the last values
global_weather.tail()

Unnamed: 0,year,avg_temp
261,2011,9.52
262,2012,9.51
263,2013,9.61
264,2014,9.57
265,2015,9.83


In [130]:
local_weather.tail()

Unnamed: 0,avg_temp,year,city
266,10.06,2009,Berlin
267,8.61,2010,Berlin
268,10.56,2011,Berlin
269,9.96,2012,Berlin
270,10.12,2013,Berlin


In [131]:
# drop the first 7 rows of local weather data and the last two of global, hence data inconsistency between global and local weather data
# reset the index and dropt the old index column
rows = global_weather.shape[0]
local_weather_clean = local_weather.drop(range(7)).reset_index().drop('index', axis = 1)
global_weather_clean = global_weather.drop([rows-1, rows-2])

In [132]:
global_weather_clean.tail()

Unnamed: 0,year,avg_temp
259,2009,9.51
260,2010,9.7
261,2011,9.52
262,2012,9.51
263,2013,9.61


In [133]:
# check for NaN values in global_weather and now berlin_weather
print("Global weather data has: {} missing values and {} data points\nBerlin weather data has: {} missing values and {} data points".format(global_weather_clean.isnull().sum().sum(), global_weather_clean.shape[0], local_weather_clean.isnull().sum().sum(), local_weather_clean.shape[0]))

Global weather data has: 0 missing values and 264 data points
Berlin weather data has: 0 missing values and 264 data points


In [138]:
# calculate the moving average with panda's rolling() method
# local weather, 5, 10, 20 years moving average
local_weather_clean['5Y-MA'] = local_weather_clean['avg_temp'].rolling(window = 5).mean()
local_weather_clean['10Y-MA'] = local_weather_clean['avg_temp'].rolling(window = 10).mean()
local_weather_clean['20Y-MA'] = local_weather_clean['avg_temp'].rolling(window = 20).mean()

# global weather, 5, 10, 20 years moving average
global_weather_clean['5Y-MA'] = global_weather_clean['avg_temp'].rolling(window = 5).mean()
global_weather_clean['10Y-MA'] = global_weather_clean['avg_temp'].rolling(window = 10).mean()
global_weather_clean['20Y-MA'] = global_weather_clean['avg_temp'].rolling(window = 20).mean()


In [139]:
# calculate the correlation between the two
print("Correlation fo the yearly data {} and the \n20Y moving average {}".format(global_weather_clean['avg_temp'].corr(local_weather_clean['avg_temp']),global_weather_clean['20Y-MA'].corr(local_weather_clean['20Y-MA'])))

Correlation fo the yearly data 0.5159464209833218 and the 
20Y moving average 0.9061348758162008


In [140]:
# calculate average difference and standard deviation
# 20 year average
print('20Y-MA Average difference: {}°C\nStandard deviation: {}°C\n'.format((local_weather_clean['20Y-MA']-global_weather_clean['20Y-MA']).mean(), (local_weather_clean['20Y-MA']-global_weather_clean['20Y-MA']).std()))

# yearly
print('Average difference: {}°C\nStandard deviation: {}°C'.format((local_weather_clean['avg_temp']-global_weather_clean['avg_temp']).mean(), (local_weather_clean['avg_temp']-global_weather_clean['avg_temp']).std()))


20Y-MA Average difference: 0.5560775510204099°C
Standard deviation: 0.1611324997813315°C

Average difference: 0.5583333333333333°C
Standard deviation: 0.76625370483766°C


In [141]:
# create an empty bokeh plot
weather_plot = figure(plot_width = 1600, plot_height = 800, title = "Gobal and local temeperature development over the past 250 years")

# set the style
weather_plot.title.text_color = "red"
weather_plot.title.text_font_style = "bold"
weather_plot.title.text_font_size = '18px'

# start from year 1770, hence 20Y-MA
weather_plot.x_range = Range1d(1770, 2013)
weather_plot.y_range = Range1d(6, 11)

# add hover feature
weather_plot.add_tools(HoverTool())

# set background color
weather_plot.background_fill_color = "red"
weather_plot.background_fill_alpha = 0.2

# set axis labels
weather_plot.xaxis.axis_label = "Year"
weather_plot.yaxis.axis_label = "Average Temperature in °C"

# put a frame around the plot
weather_plot.outline_line_width = 8
weather_plot.outline_line_alpha = 0.5
weather_plot.outline_line_color = "black"

# draw a trajectory on the plot
# local data
weather_plot.line(x = 'year', y = 'avg_temp', source = local_weather_clean, color = "blue", line_dash = 'dashed', legend = 'Berlin')
weather_plot.line(x = 'year', y = '20Y-MA', source = local_weather_clean, color = "blue", line_width = 3, legend = 'Berlin 20Y-MA')

# global data
weather_plot.line(x = 'year', y = 'avg_temp', source = global_weather_clean, color = "green", line_dash = 'dashed', legend = 'Global')
weather_plot.line(x = 'year', y = '20Y-MA', source = global_weather_clean, color = "green",line_width = 3, legend = 'Global 20Y-MA')


# show the plot
show(weather_plot)