In [1]:
import torch
import numpy as np
import pandas as pd
import sklearn
from sklearn import model_selection, preprocessing

In [2]:
#Feature DataFrames
df_energy = pd.read_csv("Annual Global Energy Consumption (1750-2017)")
df_volcano = pd.read_csv("Annual Number of Volcanic Eruptions (1750-2017)")
df_dam = pd.read_csv("Cumulative Number of Dams Built Yearly (1750-2017)")
df_temperature = pd.read_csv("Annual Temperature Anomalies (1750-2017)")
df_forest = pd.read_csv("Annual Global Forest Coverage (1750-2017)")

#Target DataFrame
df_earthquake = pd.read_csv("Annual Number of Earthquakes (1750-2017)")

print(len(df_energy), len(df_volcano), len(df_dam), len(df_temperature), len(df_forest), len(df_earthquake))

268 268 268 268 268 268


In [3]:
input1 = np.expand_dims(df_energy['Energy Consumption (exajoules)'].values, axis=1)
input2 = np.expand_dims(df_volcano['Number of Volcanic Eruptions'].values, axis=1)
input3 = np.expand_dims(df_dam['Total Dams Built'].values, axis=1)
input4 = np.expand_dims(df_temperature['Annual Average Temperature Anomaly (celsius)'].values, axis=1)
input5 = np.expand_dims(df_forest['Global Forest Coverage (square kilometers)'].values, axis=1)
target = np.expand_dims(df_earthquake['Number of Earthquakes'].values, axis=1)

In [9]:
input1.shape

(268, 1)

In [4]:
scaler = preprocessing.MinMaxScaler(feature_range=(0.1,1))
input1 = scaler.fit_transform(input1)
input2 = scaler.fit_transform(input2)
input3 = scaler.fit_transform(input3)
input4 = scaler.fit_transform(input4)
input5 = scaler.fit_transform(input5)

In [5]:
input1

array([[0.1       ],
       [0.10012742],
       [0.10025484],
       [0.10038226],
       [0.10050968],
       [0.1006371 ],
       [0.10076451],
       [0.10089193],
       [0.10101935],
       [0.10114677],
       [0.10127419],
       [0.1016809 ],
       [0.10208761],
       [0.10249432],
       [0.10290102],
       [0.10330773],
       [0.10371444],
       [0.10412115],
       [0.10452786],
       [0.10493456],
       [0.10534127],
       [0.10574798],
       [0.10615469],
       [0.1065614 ],
       [0.10696811],
       [0.10737481],
       [0.10778152],
       [0.10818823],
       [0.10859494],
       [0.10900165],
       [0.10940835],
       [0.10981506],
       [0.11022177],
       [0.11062848],
       [0.11103519],
       [0.11144189],
       [0.1118486 ],
       [0.11225531],
       [0.11266202],
       [0.11306873],
       [0.11347544],
       [0.11388214],
       [0.11428885],
       [0.11469556],
       [0.11510227],
       [0.11550898],
       [0.11591568],
       [0.116

In [6]:
feature_data = []
for i in range(len(input1)):
    sampleinput = np.array([input1[i], input2[i], input3[i], input4[i], input5[i]])
    feature_data.append(sampleinput)
feature_data = np.array(feature_data)

In [7]:
feature_data

array([[[0.1       ],
        [0.1       ],
        [0.1       ],
        [0.40568336],
        [1.        ]],

       [[0.10012742],
        [0.1       ],
        [0.1       ],
        [0.35876471],
        [0.99817337]],

       [[0.10025484],
        [0.1       ],
        [0.1       ],
        [0.54235224],
        [0.99634674]],

       ...,

       [[0.97382055],
        [0.73      ],
        [0.99403372],
        [0.94932456],
        [0.1487925 ]],

       [[0.98263547],
        [0.55      ],
        [0.99688716],
        [1.        ],
        [0.10104815]],

       [[1.        ],
        [0.82      ],
        [1.        ],
        [0.96878971],
        [0.1       ]]])

In [8]:
target_array = [np.array([i]) for i in target]
target_data = np.array(target_array)

In [10]:
#250 is the index for the year 2000
nontest_feature = feature_data[:250] 
nontest_target = target_data[:250]
test_feature = feature_data[250:]
test_target = target_data[250:]

#80% of non-test data will be used for training while the rest will be used for validation (i.e. checking the accuracy of the model)
train_feature, val_feature, train_target, val_target= model_selection.train_test_split(nontest_feature, 
                                                                                               nontest_target, 
                                                                                               train_size=0.8,
                                                                                               shuffle=False
                                                                                              )
                                       
#Arrays to be used are train_feature, train_target, val_feature, val_target, test_feature and test_target

In [11]:
np.save("Train(Features).npy", train_feature)
np.save("Train(Targets).npy", train_target)
np.save("Val(Features).npy", val_feature)
np.save("Val(Targets).npy", val_target)
np.save("Test(Features).npy", test_feature)
np.save("Test(Targets).npy", test_target)