In [8]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load in 

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the "../input/" directory.
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# Any results you write to the current directory are saved as output.

In [9]:
temperatures = pd.read_csv('/kaggle/input/climate-change-earth-surface-temperature-data/GlobalTemperatures.csv')
temperatures

In [10]:
# Start with some line plots of temperatures' evolution over time
import matplotlib.pyplot as plt

# Land average temperature
plt.figure(figsize=(15,15))
plt.plot(temperatures['dt'],temperatures['LandAverageTemperature'])
plt.plot(temperatures['dt'],temperatures['LandAverageTemperatureUncertainty'])
plt.legend(['Average temperature','Average temperature uncertainty'])
plt.title('Land average temperature evolution over almost 300 years')
plt.xlabel('Date time')
plt.ylabel('Average Temperature (celsius degrees)')
plt.show()

# One can observe here that average temperature is globally slightly increasing but its variance remains quite unchanged 
# throughout the years. In addition to this last observation, the slightly greater variance in first recorded years seems
# explained by an increased uncertainty over measures of temperatures at that time. Likewise, one can argue that this
# observation might reinforce the first hint which is an increasing average temperature as for lands over the years.

In [11]:
temperatures.head(5)

In [12]:
#dropping columns that are not essential 
temperatures.drop(columns  = ['LandAverageTemperatureUncertainty','LandMaxTemperature','LandMaxTemperatureUncertainty','LandMinTemperature','LandMinTemperatureUncertainty',
                         'LandAndOceanAverageTemperature','LandAndOceanAverageTemperatureUncertainty'], inplace = True)

In [13]:
dataset = temperatures
dataset.head(5)

In [14]:
#extracting year and month from the date column in the dataframe

dataset['date'] = pd.to_datetime(dataset['dt'])
dataset['y'] = dataset['date'].map(lambda x:x.year)
dataset['m'] = dataset['date'].map(lambda x:x.month)

In [15]:
years = list(set([val for val in dataset['y']]))

In [16]:
#calculating total how many number of years the db has
year_max = dataset['y'].max()
year_min = dataset['y'].min()

length = year_max - year_min +1

#creating a list of years to store all year values
years = [year_min]

for i in range (1, length):
    years.append(year_min + i)

In [17]:
#calculating the mean over a set of years since we are calculating yearly predictions

meantemperature = dataset['LandAverageTemperature'].groupby(dataset['y']).mean().fillna(0.0)

In [18]:
#creating a new dataframe to store the data 

average = pd.DataFrame()
average['years']=meantemperature

average

In [19]:
data_lstm = average.to_numpy()
data_lstm.shape

In [20]:


from sklearn.preprocessing import MinMaxScaler

# Create training and testing sets
ratio = 0.9
input_length = 5    # Here we take 3 years as input length
output_length = 1


#data_lstm.reset_index(inplace=True,drop=True)
#data_lstm = data_lstm.values

# # Scale data between 0 and 1
scaler = MinMaxScaler()
data_lstm = scaler.fit_transform(np.reshape(data_lstm,(-1,1)))
#data_lstm = np.reshape(data_lstm,(-1,1))
# Split between training and testing sets
split = (int)(np.ceil(ratio*len(data_lstm)))
x_train = [data_lstm[i:i+input_length] for i in range(split-input_length)]
y_train = [data_lstm[i+input_length][0] for i in range(split-input_length)]
x_test = [data_lstm[i+split:i+split+input_length] for i in range(len(data_lstm)-split-input_length)]
y_test = [data_lstm[i+split+input_length][0] for i in range(len(data_lstm)-split-input_length)]

# Check shapes and look at some of the values
print(np.shape(x_train),np.shape(y_train))
print(np.shape(x_test),np.shape(y_test))
print(x_train[0])
print(y_train[0])
print(x_test[0])
print(y_test[0])

# Reshape x_train and x_test in order to be used in LSTM layers
x_train_lstm = np.reshape(x_train, (np.shape(x_train)[0], np.shape(x_train)[1], 1))
x_test_lstm = np.reshape(x_test, (np.shape(x_test)[0], np.shape(x_test)[1], 1))

print(np.shape(x_train_lstm))

In [22]:
type(y_train)

In [85]:
# Try to predict Average Temperature evolution over next decades regarding land and ocean temperatures

# Import deep learning libraries
from keras.models import Sequential
from keras.layers.recurrent import LSTM
from keras.layers.core import Dense, Activation, Dropout
from keras.optimizers import SGD

# Build our model
lstm = Sequential()
 
# Declare the layers
layers = [LSTM(units=8, input_shape=(input_length,1), activation='relu',return_sequences=True),
          LSTM(units=2, activation='relu'),
         Dense(output_length)]
 
# Add the layers to the model
for layer in layers:
    lstm.add(layer)

# Compile our model
lstm.compile(loss='mean_squared_error', optimizer='adam', metrics=['accuracy'])
 
# Fit the model
history_lstm = lstm.fit(x_train_lstm, y_train, validation_data=(x_test_lstm,y_test), epochs=20, batch_size=16)

In [86]:
# Plot loss evolution over training
plt.plot(history_lstm.history['loss'])
plt.plot(history_lstm.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')  
plt.xlabel('epochs')
plt.legend(['train','val'], loc='upper left')

In [94]:
# Make some predictions over 50 years and plot them
x_pred = [data_lstm[i:i+input_length] for i in range(len(data_lstm)-input_length)]
x_pred = np.reshape(x_pred,(np.shape(x_pred)[0],np.shape(x_pred)[1],1))

# Make prediction
pred = lstm.predict(x_pred)

t = scaler.inverse_transform(pred[-35:])

In [92]:
pred.shape

In [88]:
# Plot results
plt.figure(figsize=(10,10))
plt.plot(years,scaler.inverse_transform(data_lstm),color='b')
plt.plot([i for i in range(2015,2050)],scaler.inverse_transform(pred[-35:]),color='r')
plt.legend(['recorded data','prediction'])
plt.xlabel('Years')
plt.ylabel('Average temperature (celsius degree)')
plt.title('Average land and ocean temperature forecast over next 50 years.')
plt.show()

In [105]:
from sklearn.metrics import mean_squared_error

accuray = 1- mean_squared_error(scaler.inverse_transform(data_lstm)[255:], scaler.inverse_transform(pred[250:]))

In [106]:
accuray