#### Importing Dependencies

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn import metrics

#### Data Pre-processing

In [None]:
#importing the dataset and loading into pandas datframe
gold_dataset = pd.read_csv('data/gold_price_data.csv')

#printing the first 5 rows
gold_dataset.head()

In [None]:
#checking for missing values and number of rows
print(gold_dataset.shape)

gold_dataset.isnull().sum()

In [None]:
gold_dataset.describe()

#dropping date column
gold_dataset = gold_dataset.drop(columns='Date', axis=1)

##### Correlation

In [None]:
#using heatmap to chck for correlation

correlation = gold_dataset.corr()

plt.figure(figsize=(5,5))
sns.heatmap(data=correlation, cbar=True, square=True, fmt='.1f', annot=True, annot_kws={'size':8}, cmap='Blues')

In [None]:
correlation['GLD']

In [None]:
#checking distribution of Gold Price
plt.figure(figsize=(5,5))
sns.distplot(gold_dataset['GLD'], color='green')

In [None]:
#splitting target and label
x = gold_dataset.drop(columns='GLD', axis=1)
y = gold_dataset['GLD']

#### Model Training and Evaluation

In [None]:
#splitting into test and train data
x_test, x_train, y_test, y_train = train_test_split(x,y, test_size=0.2, random_state=3)

In [None]:
regressor = RandomForestRegressor(n_estimators=100)
regressor.fit(x_train, y_train)

In [None]:
#Metrics (errors) on test data
test_data_prediction = regressor.predict(x_test)

#R-square error
error_score = metrics.r2_score(test_data_prediction, y_test)
print('R squared error is: ',error_score )

In [None]:
#plotting the difference between predicted values and the actual values

y_test = list(y_test)
plt.figure(figsize=(5,5))
plt.plot(y_test, color='blue', label ='Actual Value')
plt.plot(test_data_prediction, color='green', label='Predicted value')
plt.title('Actual Price vs Predicted price')
plt.xlabel('Number of values')
plt.ylabel('Gold price')
plt.legend()