# Importing packages and datasets:

In [None]:
from IPython.core.display import display, HTML
display(HTML("<style>.container{max-width:100%!important;width:auto!important;}</style>"))

In [None]:
%reload_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.tseries.offsets import BDay
import seaborn as sns

import pandas_bokeh
from bokeh.models import ColumnDataSource
from bokeh.io import output_notebook
from bokeh.plotting import figure, output_file, show
from bokeh.palettes import Spectral6
from bokeh.layouts import row


output_notebook()

import xgboost as xgb
from xgboost import XGBRegressor

from sklearn.svm import SVR
from sklearn.preprocessing import MinMaxScaler
from sklearn.linear_model import Lasso
from sklearn import linear_model
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor  
from sklearn.preprocessing import StandardScaler
from collections import OrderedDict
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import metrics
from sklearn.linear_model import SGDRegressor
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
from sklearn import neighbors

from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM

%matplotlib inline
sns.set()


### Importing Currency exchange rates dataset:

In [None]:
df1 = pd.read_csv("C:/Users/johns/desktop/AI/dataset with factors.csv")
df1

### Importing factors influencing FOREX rates dataset :

In [None]:
df2 = pd.read_csv("C:/Users/johns/desktop/AI/FOREX factors.csv")
df2

### Importing US natural disaster dataset:

In [None]:
df3 = pd.read_csv("C:/Users/johns/downloads/events-US-1980-2020.csv")
df3

# Data Wrangling:

### Cleaning Currency exchange rates dataset:

In [None]:
df1.shape, df1.info()

In [None]:
df1['Date'] = pd.to_datetime(df1['Date'], format = "%d/%m/%Y")

In [None]:
# Converting date column from object data type to date data type
# Creating a new column to know the day of the provided date
# Removing the weekend data as it is mostly null 

df1['day_of_week'] = df1['Date'].dt.day_name()
df1 = df1[~df1['day_of_week'].isin(['Saturday', 'Sunday'])]


In [None]:
df1.isnull().sum().sum()

In [None]:
df1=df1.interpolate(method ='linear', limit_direction ='both')

In [None]:
df1.head(20)

In [None]:
df1.isnull().sum()

### Cleaning FOREX factors dataset:

In [None]:
df2['Date'] = pd.to_datetime(df2['Date'], format = "%d/%m/%Y")
df2.info()

### Merging currency exchange rates dataset and FOREX factors datasets:

In [None]:
forex_df = pd.merge(df1,df2, how = 'outer', on = "Date")
forex_df

In [None]:
forex_df.info()

In [None]:
forex_df.to_csv(r'C:/users/johns/desktop/AI/actual_dataset.csv')

### Cleaning US natural disaster dataset:

In [None]:
df3.drop(['End Date'], axis=1)
df3['Begin Date'] = pd.to_datetime(df3['Begin Date'],format='%Y-%m-%d').dt.date.astype('datetime64[s]')

### Merging US Natural disater dataset with FOREX dataset:

In [None]:
df3 = df3.rename({'Begin Date' : 'Date'}, axis = 1)

disaster_df = pd.merge(forex_df,df3 , how = 'left', on = "Date")

### Cleaning the final FOREX dataset:

In [None]:
disaster_df['disaster_event'] = disaster_df['Disaster']
disaster_df['disaster_event'].loc[~disaster_df['disaster_event'].isnull()] = 1  # not nan
disaster_df['disaster_event'].loc[disaster_df['disaster_event'].isnull()] = 0  # nan
disaster_df['Damage Cost (Millions of Dollars)'].loc[disaster_df['Damage Cost (Millions of Dollars)'].isnull()] = 0  # nan
disaster_df['Deaths'].loc[disaster_df['Deaths'].isnull()] = 0  # nan
disaster_df['Disaster'].fillna("No disaster", inplace = True)
disaster_df['Name'].fillna("No disaster", inplace = True)

disaster_df = disaster_df[['Date','Name','disaster_event', 'Disaster', 'Damage Cost (Millions of Dollars)', 'Deaths', 'Goldprices', 'Debt', 'GDP', 'CPI', 'PPI','EUR:Euro', 'GBP:Pound Sterling', 'AUD:Australian Dollar', 'CAD:Canadian Dollar', 'CHF:Swiss Franc'
               , 'JPY:Japanese Yen', 'NZD:New Zealand Dollar']]

disaster_df.head(50)

# Exploratory Data Analysis on Time - Series data :

In [None]:
disaster_df.plot_bokeh.line(x = 'Date', y = ['EUR:Euro', 'GBP:Pound Sterling', 'AUD:Australian Dollar',
       'CAD:Canadian Dollar', 'CHF:Swiss Franc', 'NZD:New Zealand Dollar'], ylabel = 'Value of currency for 1 USD', title = 'Comparision of currencies against USD', figsize = (1500,800), panning = False)

In [None]:
disaster_df.plot_bokeh(kind='scatter',x = 'GDP', y = 'Debt', ylabel = 'Debt' , title = 'GDP vs Debt', figsize = (1500,800), panning = False)

In [None]:
disaster_df[disaster_df['disaster_event']==1].count(axis=0)

In [None]:
disaster_count1 = disaster_df[disaster_df.disaster_event==1]
disaster_count2 = pd.DataFrame(disaster_count1['Disaster'].value_counts())
#factor_cmap('fruits', palette=Spectral6, factors=fruits)
disaster_count2.plot_bokeh(kind='bar', y = 'Disaster', xlabel = 'Disaster type', ylabel = 'Number of occurences', title = 'Number of instances of each disaster type in the past 19 years', figsize = (1500,600), legend = 'top_right')


In [None]:
featured_data = disaster_df.copy()

from sklearn.preprocessing import LabelEncoder

lbl = LabelEncoder()

featured_data['Disaster'] = lbl.fit_transform(featured_data['Disaster'])
featured_data.head(50)

In [None]:
corrmat = disaster_df.corr()
top_corr_features = corrmat.index
plt.figure(figsize=(20,20))
#plot heat map
g=sns.heatmap(disaster_df[top_corr_features].corr(),annot=True,cmap="RdYlGn")

In [None]:
featured_data['year'] = featured_data['Date'].dt.year
featured_data['month'] = featured_data['Date'].dt.month
featured_data['day'] = featured_data['Date'].dt.day
featured_data['week'] = featured_data['Date'].dt.week

featured_data.drop(['Date', 'Name'], axis =1)

In [None]:
featured_data = featured_data[['year','month','day','week','disaster_event', 'Disaster', 'Damage Cost (Millions of Dollars)', 'Deaths', 'Goldprices', 'Debt', 'GDP', 'CPI', 'PPI','EUR:Euro', 'GBP:Pound Sterling', 'AUD:Australian Dollar', 'CAD:Canadian Dollar', 'CHF:Swiss Franc'
               , 'JPY:Japanese Yen', 'NZD:New Zealand Dollar']]

In [None]:
features_lagged = featured_data.copy()
for items in features_lagged.columns:
    features_lagged[items+'_Lags']= features_lagged[items].shift(2)

features_lagged = features_lagged.dropna()
features_lagged

## Linear Regression Algorithm:

In [None]:
def linear_model(dataset):
    for col in dataset.columns[13:20]:
        x = dataset[dataset.columns.difference([col, col + '_lags'])].values
        y = dataset[col].values
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)
        
        print('USD vs', col)
        print(' \n ')
        
        regressor = LinearRegression()  
        regressor.fit(x_train, y_train)
        y_pred = regressor.predict(x_test)
        pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
        pred_df_head = pred_df.head(20)
        print(pred_df_head)
        
        plt.plot(pred_df_head)
        
        pred_df_head.plot(kind='bar',figsize=(10,8))
        plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
        plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
        plt.show()
        plt.close()
        
        error = abs(y_pred - y_test)
        mape = np.mean(100 * (error / y_test))
        accuracy = 100 - mape
        
        print('Mean Absolute Percentage Error:', mape)
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
        print('Accuracy:', accuracy)
        
        print(' \n ')
        print('---------------------------------------------')
        print(' \n ')

In [None]:
linear_model(features_lagged)

## Decision Trees with Regression Algorithm:

In [None]:
def decisiontree_regressor(dataset):
    for col in dataset.columns[13:20]:
        x = dataset[dataset.columns.difference([col, col + '_lags'])].values
        y = dataset[col].values
        
        sc_x = MinMaxScaler()
        x = sc_x.fit_transform(x)
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)
        
        print('USD vs', col)
        print(' \n ')
        
        DT_regressor = DecisionTreeRegressor(criterion='mse', max_depth=10) 
                             
        DT_regressor.fit(x_train, y_train)
        y_pred = DT_regressor.predict(x_test)
        pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
        pred_df_head = pred_df.head(20)
        print(pred_df_head)
        
        plt.plot(pred_df_head)
        plt.legend()
        
        pred_df_head.plot(kind='bar',figsize=(10,8))
        plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
        plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
        plt.show()
        plt.close()

        error = abs(y_pred - y_test)
        mape = np.mean(100 * (error / y_test))
        accuracy = 100 - mape
        
        print('Mean Absolute Percentage Error:', mape)
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
        print('Accuracy:', accuracy)
        
        print(' \n ')
        print('---------------------------------------------')
        print(' \n ')

In [None]:
decisiontree_regressor(features_lagged)

## KNN Algorithm:

In [None]:
def knn_algorithm(dataset):
    for col in dataset.columns[13:20]:
        x = dataset[dataset.columns.difference([col, col + '_lags'])].values
        y = dataset[col].values
        
        sc_x = MinMaxScaler()
        x = sc_x.fit_transform(x)
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)
        
        print('USD vs', col)
        print(' \n ')
        
        knn_model = neighbors.KNeighborsRegressor(n_neighbors = 4)
        knn_model.fit(x_train, y_train)
        y_pred = knn_model.predict(x_test)
        
        pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
        pred_df_head = pred_df.head(20)
        print(pred_df_head)
        
        plt.plot(pred_df_head)
        
        pred_df_head.plot(kind='bar',figsize=(10,8))
        plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
        plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
        plt.show()
        plt.close()
        
        error = abs(y_pred - y_test)
        mape = np.mean(100 * (error / y_test))
        accuracy = 100 - mape
        
        print('Mean Absolute Percentage Error:', mape)
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
        print('Accuracy:', accuracy)
        
        print(' \n ')
        print('---------------------------------------------')
        print(' \n ')

In [None]:
knn_algorithm(features_lagged)

## XG Boost Algorithm:

In [None]:
def xgb_model(dataset):
    for col in dataset.columns[13:20]:
        x = dataset[dataset.columns.difference([col, col + '_lags'])].values
        y = dataset[col].values
        
        scaler = MinMaxScaler(feature_range=(0, 1))
        scale_x = scaler.fit_transform(x)
        
        x_train, x_test, y_train, y_test = train_test_split(scale_x, y, test_size=0.25, random_state=0)
        
        print('USD vs', col)
        print(' \n ')
        
        model = XGBRegressor(objective ='reg:squarederror',
                     seed=100,
                     n_estimators=100,
                     max_depth=3,
                     learning_rate=0.1,
                     min_child_weight=1,
                     subsample=1,
                     colsample_bytree=1,
                     colsample_bylevel=1,
                     gamma=0)

        # Train the regressor
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        
        pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
        pred_df_head = pred_df.head(20)
        print(pred_df_head)
        
        plt.plot(pred_df_head)
        
        pred_df_head.plot(kind='bar',figsize=(10,8))
        plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
        plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
        plt.show()
        plt.close()

        error = abs(y_pred - y_test)
        mape = np.mean(100 * (error / y_test))
        accuracy = 100 - mape
        
        print('Mean Absolute Percentage Error:', mape)
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
        print('Accuracy:', accuracy)
        
        print(' \n ')
        print('---------------------------------------------')
        print(' \n ')

In [None]:
xgb_model(features_lagged)

## Support Vector Regression:

In [None]:
def svr_algorithm(dataset):
    for col in dataset.columns[13:20]:
        x = dataset[dataset.columns.difference([col, col + '_lags'])].values
        y = dataset[col].values
        
        sc_x = MinMaxScaler()
        x = sc_x.fit_transform(x)
        
        x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=0)

        svr_model = SVR(kernel='poly')
        
        print('USD vs', col)
        print(' \n ')
        
        
        svr_model.fit(x_train, y_train)
        y_pred = svr_model.predict(x_test)
        
        pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
        pred_df_head = pred_df.head(20)
        print(pred_df_head)
        
        plt.plot(pred_df_head)
        
        pred_df_head.plot(kind='bar',figsize=(10,8))
        plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
        plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
        plt.show()
        plt.close()
        
        error = abs(y_pred - y_test)
        mape = np.mean(100 * (error / y_test))
        accuracy = 100 - mape
        
        print('Mean Absolute Percentage Error:', mape)
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
        print('Accuracy:', accuracy)
        
        print(' \n ')
        print('---------------------------------------------')
        print(' \n ')

In [None]:
svr_algorithm(features_lagged)

## Artificial Neural Network:

In [None]:
def ann_algorithm(dataset):
    for col in dataset.columns[13:20]:
        x = dataset[dataset.columns.difference([col, col + '_lags'])].values    
        y = dataset[col].values

        min_max_scaler = MinMaxScaler()
        x_scale = min_max_scaler.fit_transform(x)
        x_scale
        
        x_train, x_test, y_train, y_test = train_test_split(x_scale, y, test_size=0.25, random_state=0)
        
        print('USD vs', col)
        print(' \n ')
        
        ann_model = Sequential()
        ann_model.add(Dense(18, input_dim=39, activation='relu'))
        ann_model.add(Dense(12, activation='relu'))
        ann_model.add(Dense(1))
        ann_model.compile(loss='mean_squared_error', optimizer='adam')
        #ann_model.compile(loss='mean_squared_error', optimizer='adam')
        #early_stop = EarlyStopping(monitor='loss', patience=2, verbose=1)
        ann_model.fit(x_train, y_train, epochs=1, batch_size=16, verbose = 0, validation_split=0.2, shuffle=False)
        
        y_pred = ann_model.predict(x_test)
        #y_train_pred_nn = nn_model.predict(x_train)

 

        #pred_df = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
        pred_df = pd.DataFrame(pd.concat({'Actual': pd.DataFrame(y_test), 'Predicted': pd.DataFrame(y_pred)}, ignore_index=True, axis=1))

 

        pred_df_head = (pred_df.head(20))
        pred_df_head = pred_df_head.rename({0 : 'Actual', 1:'Predicted'}, axis =1)
        print(pred_df_head)
        
        plt.plot(pred_df_head)
        
        pred_df_head.plot(kind='bar',figsize=(10,8))
        plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
        plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
        plt.show()
        plt.close()
        
        error = abs(y_pred - y_test)
        mape = np.mean(100 * (error / y_test))
        accuracy = 100 - mape

 

        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))
        
        print(' \n ')
        print('---------------------------------------------')
        
        print('Mean Absolute Percentage Error:', mape)
        print('Accuracy:', accuracy)
        print(' \n ')
        print('---------------------------------------------')
        print(' \n ')

In [None]:
ann_algorithm(features_lagged)

## LSTM:

In [None]:
def lstm_algorithm(dataset):
    
    for col in dataset.columns[13:20]:
        x = dataset[dataset.columns.difference([col, col + '_lags'])].values    
        y = dataset[col].values
        
        min_max_scaler = MinMaxScaler()
        x_scale = min_max_scaler.fit_transform(x)
        x_scale
        
        x_train, x_test, y_train, y_test = train_test_split(x_scale, y, test_size=0.25, random_state=0)
 
        #LSTM expects the input data in a specific 3D format of sample size, time steps, no. of input features
    
        x_train = x_train.reshape(x_train.shape[0],1,x_train.shape[1])
        x_test = x_test.reshape(x_test.shape[0],1, x_test.shape[1])

        model = Sequential()

        model.add(LSTM(units=72, dropout=0.2, recurrent_dropout=0.2, 
                       input_shape=(x_train.shape[1], x_train.shape[2]), 
                       return_sequences=True))

        model.add(LSTM(units=72, dropout=0.2, recurrent_dropout=0.2, 
                       return_sequences=False))

 


        model.add(Dense(1, activation='relu'))
        model.compile(loss='mean_squared_error', optimizer='adam' )
        model.summary()
        # fit network

        history = model.fit(x_train, y_train, epochs=1, batch_size=16, validation_data=(x_test, y_test), verbose=2, shuffle=True)
        
        y_pred = model.predict(x_test)
        
        x_test=x_test.reshape((x_test.shape[0],x_test.shape[2])) 
        
        pred_df = pd.DataFrame(pd.concat({'Actual': pd.DataFrame(y_test), 'Predicted': pd.DataFrame(y_pred)}, ignore_index=True, axis=1))
        pred_df_head = pred_df.head(20)
        pred_df_head = pred_df_head.rename({0 : 'Actual', 1:'Predicted'}, axis =1)
        print(pred_df_head)
        
        
        plt.plot(pred_df_head)
        
        pred_df_head.plot(kind='bar',figsize=(10,8))
        plt.grid(which='major', linestyle='-', linewidth='0.5', color='green')
        plt.grid(which='minor', linestyle=':', linewidth='0.5', color='black')
        plt.show()
        plt.close()
        
        error = abs(y_pred - y_test)
        mape = np.mean(100 * (error / y_test))
        accuracy = 100 - mape
        
        print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))  
        print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))  
        print('Root Mean Squared Error:', np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

        print('Mean Absolute Percentage Error:', mape)
        print('Accuracy:', accuracy)

        print(' \n ')
        print('---------------------------------------------')
        print(' \n ')
      

In [None]:
lstm_algorithm(features_lagged)