In [None]:
# Importing the dependencies
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor 

Data Collection and processing

In [None]:
# Loading the csv data to a Pandas Dataframe
dataset = pd.read_csv('../input/gold-price-data/gld_price_data.csv')

In [None]:
# Printing the first 5 rows of dataset
dataset.head()

In [None]:
# Printing the last 5 rows of dataset
dataset.tail()

In [None]:
# Checking the numbers of rows and columns
dataset.shape

In [None]:
# Finding the null values of the dataset
dataset.isnull().sum()

In [None]:
# getting some information about the dataset
dataset.info()

In [None]:
# Getting some statistical information about the data
dataset.describe()

In [None]:
# checking if there are dublicates
dataset.duplicated(keep='first').sum() 

In [None]:
# checking how many unique value we have in each columns 
for col in dataset.columns:
    print('Number of ', col, ' unique values: ', dataset[col].nunique())

In [None]:
# Date column to pandas datetime format
dataset['Date'] = pd.to_datetime(dataset['Date']) 

In [None]:
# let's sort the data by order data    
dataset.sort_values(by = ['Date'], inplace = True)

In [None]:
# Indexing by the Date column
# dataset.set_index('Date', inplace = True) 

In [None]:
# Showing the correlation between data
corr = dataset.corr()

In [None]:
# Constructing a heatmap for showing correaltion
sns.heatmap(data=corr, annot=True, cmap='plasma', square=True)

In [None]:
# Constructing a pairplot for showing distributing data ineach column
# sns.pairplot(data=dataset, hue='EUR/USD')
sns.pairplot(data=dataset)

In [None]:
# Constructing a displot for showing the distibution of the Gold price
sns.distplot(dataset['GLD'], color='Blue')

In [None]:
# Splitting the Features and Target
x = dataset.drop(['Date','GLD'], axis=1)
y = dataset['GLD']

In [None]:
# Splitting data into training and testing data
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2)

In [None]:
# Loading the Random Forest Regressor model
model = RandomForestRegressor(n_estimators=100)

In [None]:
# Fitting the model to training data
model.fit(x_train, y_train)

In [None]:
# Evalution of the model
# Prediction on the training data 
train_data_pred = model.predict(x_train)

In [None]:
# Computation of R squared error for the training data
train_score = metrics.r2_score(y_train, train_data_pred)
print('R squared error : ', train_score)

In [None]:
# Prediction on the testing data 
test_data_pred = model.predict(x_test)

In [None]:
# Computation of R squared error for the testing data
test_score = metrics.r2_score(y_test, test_data_pred)
print('R squared error : ', test_score)

In [None]:
# Visualizing the predicted price and actual price
y_test = list(y_test)
ax = plt.axes()
ax.set(
       title='Actual price vs. Predicted price',
       xlabel='Number of values',
       ylabel='GLD price',
       )
plt.plot(y_test, color='blue', label='Actual value')
plt.plot(test_data_pred, color='red', label='Predicted value')
plt.legend()
plt.show()