# Imports

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the dataset

In [2]:
df = pd.read_csv('/Users/anilthapa/datasets/goldstock.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,Date,Close,Volume,Open,High,Low
0,0,2024-01-19,2029.3,166078.0,2027.4,2041.9,2022.2
1,1,2024-01-18,2021.6,167013.0,2009.1,2025.6,2007.7
2,2,2024-01-17,2006.5,245194.0,2031.7,2036.1,2004.6
3,3,2024-01-16,2030.2,277995.0,2053.4,2062.8,2027.6
4,4,2024-01-12,2051.6,250946.0,2033.2,2067.3,2033.1


# Dataset Inspection

In [3]:
df.drop('Unnamed: 0', axis = 1, inplace=True)

In [4]:
df.shape

(2511, 6)

In [5]:
df.isna().any()

Date      False
Close     False
Volume    False
Open      False
High      False
Low       False
dtype: bool

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2511 entries, 0 to 2510
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Date    2511 non-null   object 
 1   Close   2511 non-null   float64
 2   Volume  2511 non-null   float64
 3   Open    2511 non-null   float64
 4   High    2511 non-null   float64
 5   Low     2511 non-null   float64
dtypes: float64(5), object(1)
memory usage: 117.8+ KB


In [7]:
df.describe()

Unnamed: 0,Close,Volume,Open,High,Low
count,2511.0,2511.0,2511.0,2511.0,2511.0
mean,1498.726085,185970.770609,1498.725528,1508.451454,1488.869932
std,298.824811,97600.769382,299.118187,301.262244,296.417703
min,1049.6,1.0,1051.5,1062.7,1045.4
25%,1249.85,126693.5,1249.5,1257.3,1242.35
50%,1332.8,175421.0,1334.0,1342.4,1326.6
75%,1805.85,234832.0,1805.6,1815.45,1793.05
max,2093.1,787217.0,2094.4,2098.2,2074.6


In [8]:
df = df[['Date', 'Open', 'Close', 'Volume', 'Low', 'High', 'Volume']]

In [9]:
df.head()

Unnamed: 0,Date,Open,Close,Volume,Low,High,Volume.1
0,2024-01-19,2027.4,2029.3,166078.0,2022.2,2041.9,166078.0
1,2024-01-18,2009.1,2021.6,167013.0,2007.7,2025.6,167013.0
2,2024-01-17,2031.7,2006.5,245194.0,2004.6,2036.1,245194.0
3,2024-01-16,2053.4,2030.2,277995.0,2027.6,2062.8,277995.0
4,2024-01-12,2033.2,2051.6,250946.0,2033.1,2067.3,250946.0


In [10]:
df['Date'] = pd.to_datetime(df['Date'])

In [11]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2511 entries, 0 to 2510
Data columns (total 7 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   Date    2511 non-null   datetime64[ns]
 1   Open    2511 non-null   float64       
 2   Close   2511 non-null   float64       
 3   Volume  2511 non-null   float64       
 4   Low     2511 non-null   float64       
 5   High    2511 non-null   float64       
 6   Volume  2511 non-null   float64       
dtypes: datetime64[ns](1), float64(6)
memory usage: 137.4 KB


## Add two new columns day and month

In [12]:
df['day'] = df['Date'].dt.day
df['month'] = df['Date'].dt.month

In [13]:
df

Unnamed: 0,Date,Open,Close,Volume,Low,High,Volume.1,day,month
0,2024-01-19,2027.4,2029.3,166078.0,2022.2,2041.9,166078.0,19,1
1,2024-01-18,2009.1,2021.6,167013.0,2007.7,2025.6,167013.0,18,1
2,2024-01-17,2031.7,2006.5,245194.0,2004.6,2036.1,245194.0,17,1
3,2024-01-16,2053.4,2030.2,277995.0,2027.6,2062.8,277995.0,16,1
4,2024-01-12,2033.2,2051.6,250946.0,2033.1,2067.3,250946.0,12,1
...,...,...,...,...,...,...,...,...,...
2506,2014-01-28,1254.9,1250.5,81426.0,1248.0,1261.9,81426.0,28,1
2507,2014-01-27,1269.9,1263.5,63419.0,1252.0,1280.1,63419.0,27,1
2508,2014-01-24,1264.3,1264.5,34998.0,1256.9,1273.2,34998.0,24,1
2509,2014-01-23,1235.1,1262.5,41697.0,1230.8,1267.1,41697.0,23,1


# Now preprocess the cyclic nature of day and month

In [14]:
df['day_sin'] = np.sin(2 * np.pi * df.day / 31)
df['day_cos'] = np.cos(2 * np.pi * df.day / 31)

In [15]:
df['month_sin'] = np.sin( 2 * np.pi * df['month'] / 12)
df['month_cos'] = np.cos( 2 * np.pi * df['month'] / 12)

In [16]:
df.drop(['day', 'month'], axis = 1, inplace=True)

In [17]:
df.head()

Unnamed: 0,Date,Open,Close,Volume,Low,High,Volume.1,day_sin,day_cos,month_sin,month_cos
0,2024-01-19,2027.4,2029.3,166078.0,2022.2,2041.9,166078.0,-0.651372,-0.758758,0.5,0.866025
1,2024-01-18,2009.1,2021.6,167013.0,2007.7,2025.6,167013.0,-0.485302,-0.874347,0.5,0.866025
2,2024-01-17,2031.7,2006.5,245194.0,2004.6,2036.1,245194.0,-0.299363,-0.954139,0.5,0.866025
3,2024-01-16,2053.4,2030.2,277995.0,2027.6,2062.8,277995.0,-0.101168,-0.994869,0.5,0.866025
4,2024-01-12,2033.2,2051.6,250946.0,2033.1,2067.3,250946.0,0.651372,-0.758758,0.5,0.866025
