# Task: Predict the gold rate

## Data Exploration

In [7]:
import pandas as pd
import numpy as np
import seaborn as sns
import os

In [8]:
df_ann = pd.read_csv("annual_gold_rate.csv")
df_ann.head()

Unnamed: 0,Date,USD,EUR,GBP,INR,AED,CNY
0,1980-12-31,614.5,378.19461,263.80259,4831.74943,2278.36795,
1,1981-12-31,459.26,360.90588,227.01948,3966.6479,1691.03194,
2,1982-12-31,375.3,345.59742,215.53232,3553.6268,1378.72895,
3,1983-12-30,423.66,439.89318,279.24936,4270.42821,1557.4579,
4,1984-12-31,360.78,425.91,269.81553,4067.60433,1324.72272,


In [9]:
df_day = pd.read_csv("daily_gold_rate.csv")
df_day.head()

Unnamed: 0,Date,USD,EUR,GBP,INR,AED,CNY
0,1985-01-01,308.3,402.45,266.01,3825.85,1134.42,
1,1985-01-02,305.5,401.49,266.0,3787.05,1134.42,
2,1985-01-03,302.3,397.08,262.76,3718.88,1107.67,
3,1985-01-04,303.15,397.15,262.58,3759.64,1118.21,
4,1985-01-07,298.25,392.55,261.17,3741.23,1091.58,835.8


In [10]:
df_day.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
USD,10114.0,799.552704,544.858424,252.8,356.6125,440.525,1270.45,2067.15
EUR,10114.0,669.997853,463.177911,222.2,307.29,375.07,1063.8375,1863.68
GBP,10114.0,540.951871,418.160576,156.89,221.5325,265.35,884.7625,1634.97
INR,10114.0,43928.065703,43583.24479,3687.17,11748.69,18305.155,79378.0625,167324.36
AED,10114.0,2936.556642,2001.379712,928.48,1310.965,1617.855,4666.365,7592.64
CNY,10110.0,5307.108772,3659.619165,809.63,2220.95,3376.265,8326.245,14373.93


In [11]:
df_day.isnull().sum()

Date    0
USD     0
EUR     0
GBP     0
INR     0
AED     0
CNY     4
dtype: int64

In [12]:
df_day["GBP"].describe().T

count    10114.000000
mean       540.951871
std        418.160576
min        156.890000
25%        221.532500
50%        265.350000
75%        884.762500
max       1634.970000
Name: GBP, dtype: float64

In [None]:
sns.lineplot(x = "Date", y = "GBP", data = df_day);

## Create Dataloader

### Use GBP ONLY

In [13]:
drop = ['EUR', 'USD','INR' , 'AED', 'CNY']
df = df_day.drop(drop, axis = 1)
df.head()

Unnamed: 0,Date,GBP
0,1985-01-01,266.01
1,1985-01-02,266.0
2,1985-01-03,262.76
3,1985-01-04,262.58
4,1985-01-07,261.17


### Create Dataset and Dataloader

In [14]:
# convert timestamp to integer
df['Date'] = pd.to_datetime(df['Date'])
df['Date'] = (df['Date'] - df['Date'].min()).dt.days
df.head()

Unnamed: 0,Date,GBP
0,0,266.01
1,1,266.0
2,2,262.76
3,3,262.58
4,6,261.17


In [15]:
# Create dataloader
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader

class GoldPriceDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        
        features = torch.tensor(self.dataframe.iloc[idx]['Date'], dtype=torch.float32)
        labels = torch.tensor(self.dataframe.iloc[idx]['GBP'], dtype=torch.float32)
        
        if self.transform:
            features = self.transform(features)
            labels = self.transform(labels)
            
        return features, labels

In [16]:
dataset = GoldPriceDataset(df)

### Split train, test dataset

In [17]:
# Normal split
from torch.utils.data import Subset

dataset_size = len(dataset)
train_size = int(dataset_size * 0.95)
test_size = dataset_size - train_size

train_dataset = Subset(dataset, range(0, train_size))
test_dataset = Subset(dataset, range(train_size, dataset_size))

train_loader = DataLoader(train_dataset, batch_size=2, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=2, shuffle=False)

## Create Neural Network Model

In [28]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(2, 64)
        self.fc2 = nn.Linear(64, 64)
        self.fc3 = nn.Linear(64, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

## Train and Test the model

### loss, optimizer

In [29]:
import torch.optim as optim

net = Net()
criterion = nn.MSELoss()
optimizer = optim.SGD(net.parameters(), lr=0.001)

In [30]:
num_epochs = 100
for epoch in range(num_epochs):
    net.train()
    for feature, targets in train_loader:
        inputs = feature.float()
        targets = targets.float()

        

        outputs = net(inputs)
        loss = criterion(outputs, targets)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

KeyboardInterrupt: 

In [None]:
model.eval()
with torch.no_grad():
    for feature, targets in test_loader:
        inputs = feature.float()
        targets = targets.float()
        
        outputs = model(inputs)
        test_loss = criterion(outputs, targets)

    print(f'Test Loss: {test_loss.item():.4f}')

## Use day input to predict the gold rate

In [None]:
from datetime import datetime

def days_since_1985(timestamp):
    date_format = "%Y-%m-%d"
    start_date = datetime.strptime("1985-01-01", date_format)
    given_date = datetime.strptime(timestamp, date_format)
    delta = given_date - start_date
    return delta.days

In [None]:
inputs = torch.tensor(days_since_1985("2021-01-01")).unsqueeze(0)
new_tensor = inputs.unsqueeze(0)

In [None]:
out = model(new_tensor)