In [46]:
import torch
import torch.nn as nn
import torchvision #for dataset 
import torchvision.transforms as transforms 
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [47]:
input_size = 28*28
hidden_size = 100
num_classes = 10
num_epochs = 2
batch_size = 100
lr = 0.01

In [48]:
class PortfolioRNN(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(PortfolioRNN, self).__init__()
        self.num_layers = num_layers
        self.hidden_size = hidden_size
        self.rnn = nn.RNN(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, batch_first=True)
        # x -> (batch_size, timesteps/seq, input_size/feature_size)
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, X, r):
        h0 = torch.zeros(self.num_layers, X.size(0), self.hidden_size).to(device)
        out, _ = self.rnn(X,h0)
        # out -> (batch_size, seq_length, hidden_size)
        #out = out[:,-1,:] only take the last time step NO WE NEED ALL TIME STEPS
        out = self.linear(out)
        out = torch.softmax(out, dim=-1)
        portfolio_returns = torch.sum(out[:, :, :] * r[:, :, :], dim=-1)
        # portfolio_returns -> (batch_size)
        sharpe = torch.mean(portfolio_returns, dim = -1) / torch.std(portfolio_returns, dim = -1)
        return -sharpe.mean()
    
    def get_allocations(self, X):
        with torch.no_grad():
            h0 = torch.zeros(self.num_layers, X.size(0), self.hidden_size).to(device)
            out, _ = self.rnn(X,h0)
            out = self.linear(out)
            out = torch.softmax(out, dim=-1)
        return out[:, -1, :]

In [49]:
model = PortfolioRNN(5, 1, 1, 2).to(device)

# Generate random input data
# X -> (batch_size, seq_length, input_size)
X = torch.rand(2, 10, 5).to(device)

# r -> (batch_size, seq_length, output_size)
r = torch.rand(2, 10, 2).to(device)

# Call the forward method
loss = model(X, r)

# Print the output
print("Output loss (negative Sharpe ratio):", loss.item())

Output loss (negative Sharpe ratio): -1.6340763568878174


In [50]:
import numpy as np
import yfinance as yf
import pandas as pd

tickers = ['VTI', 'AGG', 'DBC', '^VIX']
data = yf.download(tickers, start="2006-01-01", end="2020-12-31", interval="1d")['Adj Close']
data_na = data.dropna(axis = 0)
for column in data_na.columns:
    data_na[f'{column}_R'] = data_na[f'{column}'].pct_change()
    data_na[f'{column}_y'] = data_na[f'{column}_R'].shift(-1)
data_na.dropna(axis=0, inplace=True)
data_na.reset_index(inplace=True)
data_na['Date'] = data_na['Date'].dt.date
data_na

[*********************100%***********************]  4 of 4 completed
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_na[f'{column}_R'] = data_na[f'{column}'].pct_change()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_na[f'{column}_y'] = data_na[f'{column}_R'].shift(-1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_na[f'{column}_

Ticker,Date,AGG,DBC,VTI,^VIX,AGG_R,AGG_y,DBC_R,DBC_y,VTI_R,VTI_y,^VIX_R,^VIX_y
0,2006-02-07,56.438553,20.285254,44.219505,13.590000,-0.000699,-0.000498,-0.028926,-0.004255,-0.009736,0.007194,0.042178,-0.055923
1,2006-02-08,56.410431,20.198935,44.537621,12.830000,-0.000498,0.000599,-0.004255,0.009402,0.007194,-0.001904,-0.055923,0.022603
2,2006-02-09,56.444218,20.388840,44.452812,13.120000,0.000599,-0.002097,0.009402,-0.018205,-0.001904,0.002067,0.022603,-0.019055
3,2006-02-10,56.325867,20.017662,44.544685,12.870000,-0.002097,0.000701,-0.018205,-0.015524,0.002067,-0.004523,-0.019055,0.037296
4,2006-02-13,56.365341,19.706909,44.343220,13.350000,0.000701,-0.001601,-0.015524,-0.008322,-0.004523,0.009485,0.037296,-0.082397
...,...,...,...,...,...,...,...,...,...,...,...,...,...
3745,2020-12-22,106.759186,13.624199,181.857056,24.230000,0.001443,-0.000678,-0.011004,0.012517,0.000207,0.001710,-0.036963,-0.037969
3746,2020-12-23,106.686790,13.794738,182.168045,23.309999,-0.000678,0.000933,0.012517,0.002747,0.001710,0.001673,-0.037969,-0.076362
3747,2020-12-24,106.786339,13.832635,182.472778,21.530001,0.000933,0.000170,0.002747,-0.006164,0.001673,0.006327,-0.076362,0.007896
3748,2020-12-28,106.804451,13.747366,183.627319,21.700001,0.000170,0.000169,-0.006164,0.002757,0.006327,-0.004175,0.007896,0.063594


In [66]:
data_na.head(10)

Ticker,Date,AGG,DBC,VTI,^VIX,AGG_R,AGG_y,DBC_R,DBC_y,VTI_R,VTI_y,^VIX_R,^VIX_y
0,2006-02-07,56.438553,20.285254,44.219505,13.59,-0.000699,-0.000498,-0.028926,-0.004255,-0.009736,0.007194,0.042178,-0.055923
1,2006-02-08,56.410431,20.198935,44.537621,12.83,-0.000498,0.000599,-0.004255,0.009402,0.007194,-0.001904,-0.055923,0.022603
2,2006-02-09,56.444218,20.38884,44.452812,13.12,0.000599,-0.002097,0.009402,-0.018205,-0.001904,0.002067,0.022603,-0.019055
3,2006-02-10,56.325867,20.017662,44.544685,12.87,-0.002097,0.000701,-0.018205,-0.015524,0.002067,-0.004523,-0.019055,0.037296
4,2006-02-13,56.365341,19.706909,44.34322,13.35,0.000701,-0.001601,-0.015524,-0.008322,-0.004523,0.009485,0.037296,-0.082397
5,2006-02-14,56.275108,19.5429,44.763836,12.25,-0.001601,0.001102,-0.008322,-0.014576,0.009485,0.00379,-0.082397,0.004898
6,2006-02-15,56.337143,19.258043,44.933498,12.31,0.001102,-0.001001,-0.014576,0.012102,0.00379,0.007788,0.004898,-0.067425
7,2006-02-16,56.280758,19.49111,45.283451,11.48,-0.001001,0.004105,0.012102,0.014172,0.007788,-0.001171,-0.067425,0.046167
8,2006-02-17,56.511803,19.767332,45.230442,12.01,0.004105,-0.003689,0.014172,0.016594,-0.001171,-0.002813,0.046167,0.033306
9,2006-02-21,56.303352,20.095352,45.103188,12.41,-0.003689,0.004003,0.016594,-0.016323,-0.002813,0.007837,0.033306,-0.042707


In [63]:
def create_batches(dataframe, start_date, end_date, window_size=50):
    dataframe['Date'] = pd.to_datetime(dataframe['Date'])
    
    filtered_data = dataframe[(dataframe['Date'] >= start_date) & (dataframe['Date'] <= end_date)]
    
    x_batches = []
    y_batches = []
    dates_batches = []
    dates_per_feature = []
    y = []

    for i in range(len(filtered_data) - window_size):
        window_returns = filtered_data.iloc[i:i+window_size][[col for col in dataframe.columns if col.endswith('_R')]].values
        window_prices = filtered_data.iloc[i:i+window_size][[col for col in dataframe.columns if col in ['AGG', 'DBC', 'VTI', '^VIX']]].values
        
        window_x = np.concatenate([window_returns, window_prices], axis=1)
        window_y = filtered_data.iloc[i:i+window_size][[col for col in dataframe.columns if col.endswith('_y')]].values
        
        window_dates_batches = filtered_data.iloc[i:i+window_size]['Date'].values
        dates_per_feature.append(filtered_data.iloc[i+window_size]['Date'].date()) 

        y.append(filtered_data.iloc[i+window_size][[col for col in dataframe.columns if col.endswith('_y')]].values)

        x_batches.append(window_x)
        y_batches.append(window_y)
        dates_batches.append(window_dates_batches)

    return np.array(x_batches), np.array(y_batches), np.array(y, dtype=np.float32), np.array(dates_batches), dates_per_feature

x_batches, y_batches, y, dates_batches, dates_per_feature = create_batches(data_na, start_date="2006-01-01", end_date="2020-12-29")
y = y.reshape(y.shape[0], 1, y.shape[1])

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataframe['Date'] = pd.to_datetime(dataframe['Date'])


In [71]:
x_batches.shape

(3700, 50, 8)

In [70]:
len(dates_per_feature)

3700

In [65]:
data_na.iloc[10]['Date'].date()

datetime.date(2006, 2, 22)

In [64]:
data_na.iloc[:10]['Date'].values


array(['2006-02-07T00:00:00.000000000', '2006-02-08T00:00:00.000000000',
       '2006-02-09T00:00:00.000000000', '2006-02-10T00:00:00.000000000',
       '2006-02-13T00:00:00.000000000', '2006-02-14T00:00:00.000000000',
       '2006-02-15T00:00:00.000000000', '2006-02-16T00:00:00.000000000',
       '2006-02-17T00:00:00.000000000', '2006-02-21T00:00:00.000000000'],
      dtype='datetime64[ns]')

In [60]:
window_returns = data_na.iloc[:10][['AGG_R', 'DBC_R']].values
window_prices = data_na.iloc[:10][[col for col in data_na.columns if col in ['AGG', 'DBC']]].values

window_returns.shape, window_prices.shape

concatenaed = np.concatenate([window_returns, window_prices], axis=1)
concatenaed.shape

(10, 4)

In [52]:
import datetime


def get_idx_dates(start_date, end_date):
    if start_date in dates_per_feature and end_date in dates_per_feature:
        k_start = dates_per_feature.index(start_date)
        k_end = dates_per_feature.index(end_date)
        return k_start, k_end
    
    if start_date not in dates_per_feature: 
        if any(dates >= start_date for dates in dates_per_feature):    
            k_start = min(j for j in range(len(dates_per_feature)) if dates_per_feature[j] >= start_date)
        else:
            k_start = 0
    else:
        k_start = dates_per_feature.index(start_date)

    if end_date not in dates_per_feature:
        if any(dates <= end_date for dates in dates_per_feature):
            k_end = max(j for j in range(len(dates_per_feature)) if dates_per_feature[j] <= end_date) + 1
        else:
            k_end = len(dates_per_feature) -1
    else: 
        k_end = dates_per_feature.index(end_date)

    return k_start, k_end


def training(x_batches, y_batches, model, batch_size):

    x_batches_tensor = torch.tensor(x_batches, dtype=torch.float32)
    y_batches_tensor = torch.tensor(y_batches, dtype=torch.float32)
    num_batches = x_batches.shape[0]
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    num_epochs = 100


    for epoch in range(num_epochs):
        model.train()
        loss_epoch = 0
        
        for i in range(0, num_batches, batch_size):
            x_mini_batch = x_batches_tensor[i:i+batch_size]
            y_mini_batch = y_batches_tensor[i:i+batch_size]
            
            optimizer.zero_grad()
            
            loss = model(x_mini_batch, y_mini_batch)
            loss_epoch += loss.item()
            loss.backward()
            optimizer.step()
        
        print(f"epoch [{epoch+1}/{num_epochs}], loss: {(loss_epoch/batch_size)}")

    return model



def investing(x_batches, y, model):

    x_tensor = torch.tensor(x_batches, dtype=torch.float32)
    allocations = model.get_allocations(x_tensor)
    # pourquoi ce shape ?
    allocations = allocations.view(allocations.shape[0], 1, allocations.shape[1])
    rdt = torch.sum(allocations*y, dim=2)

    # return du portfeuille à t final du rnn, les allocations
    return rdt, allocations


In [53]:
input_size = 8
hidden_size = 64
output_size = 4
batch_size = 64

rdt_all = torch.empty(0,0)
alloc_all = []
idx_invest = []
dates_invest = []

# first training goes from 2006 to 2010
first_date = dates_per_feature[0]
end_date_1st_training = first_date + datetime.timedelta(days=365*4)
# ?????
idx_start, idx_end = get_idx_dates(first_date, end_date_1st_training)

model = PortfolioRNN(input_size=input_size, hidden_size=hidden_size, num_layers = 2,output_size=output_size)


model = training(x_batches = x_batches[idx_start:idx_end, :, :],
                      y_batches= y_batches[idx_start:idx_end, :, :],
                      model=model,
                      batch_size=64)

# un an c pas 365 jours mais 252 jours de trading
date_end_invest = end_date_1st_training + datetime.timedelta(days=365*2)
idx_start_invest, idx_end_invest = get_idx_dates(end_date_1st_training, date_end_invest)

print(f"Idx start: {idx_start_invest}, Idx end: {idx_end_invest}")
print(dates_per_feature[idx_start_invest], dates_per_feature[idx_end_invest])

idx_invest.append((idx_start_invest, idx_end_invest))
dates_invest.append((end_date_1st_training, date_end_invest))

x_investing = x_batches[idx_start_invest:idx_end_invest, :, :]
y_investing = y_batches[idx_start_invest:idx_end_invest, :, :]
y_real = y[idx_start_invest:idx_end_invest, :, :]

#faut commencer de x_batches - size_window, car ça donne les allcations pour le dernier jour du batch/window out[:,-1,:]
first_rdt, first_alloc = investing(x_batches= x_investing, y=y_real, model=model)

print(first_rdt.shape, len(dates_per_feature[idx_start_invest:idx_end_invest]))

print(f"first_rdt shape: {first_rdt.shape}, rdt_all shape before concat: {rdt_all.shape}")


rdt_all = first_rdt
alloc_all = first_alloc


for i in range(5):

    model = training(x_batches = x_batches[idx_start_invest:idx_end_invest, :, :],
                      y_batches= y_batches[idx_start_invest:idx_end_invest, :, :],
                      model=model,
                      batch_size=64)
    
    date_end_invest = date_end_invest + datetime.timedelta(days=365*2)
    end_date_1st_training = end_date_1st_training + datetime.timedelta(days=365*2)
    idx_start_invest, idx_end_invest = get_idx_dates(end_date_1st_training, date_end_invest)

    x_investing = x_batches[idx_start_invest:idx_end_invest, :, :]
    y_investing = y_batches[idx_start_invest:idx_end_invest, :, :]
    y_real = y[idx_start_invest:idx_end_invest, :, :]


    first_rdt, first_alloc = investing(x_batches= x_investing, y=y_real, model=model)

    print(first_rdt.shape, len(dates_per_feature[idx_start_invest:idx_end_invest]))


    rdt_all = torch.cat([rdt_all, first_rdt], dim=0)
    print('len des rdt', rdt_all.shape[0])
    alloc_all = torch.cat([alloc_all, first_alloc], dim=0)
    idx_invest.append((idx_start_invest, idx_end_invest))
    print('len des dates', len(dates_per_feature[idx_invest[0][0]:idx_invest[-1][1]]))
    dates_invest.append((end_date_1st_training, date_end_invest))


alloc_all = alloc_all.numpy()
alloc_all = alloc_all.reshape(alloc_all.shape[0], alloc_all.shape[2])

epoch [1/100], loss: -0.017145784542663023
epoch [2/100], loss: -0.031068118376424536
epoch [3/100], loss: -0.032567627684329636
epoch [4/100], loss: -0.03829250670969486
epoch [5/100], loss: -0.030205582857888658
epoch [6/100], loss: -0.044016216779709794
epoch [7/100], loss: -0.038969211251242086
epoch [8/100], loss: -0.043501102803929825
epoch [9/100], loss: -0.035723119603062514
epoch [10/100], loss: -0.041922470030840486
epoch [11/100], loss: -0.03999450944320415
epoch [12/100], loss: -0.04672367420789669
epoch [13/100], loss: -0.04587261657616182
epoch [14/100], loss: -0.046662430537253385
epoch [15/100], loss: -0.04547698769601993
epoch [16/100], loss: -0.04166386016004253
epoch [17/100], loss: -0.05181401543086395
epoch [18/100], loss: -0.04006809197016992
epoch [19/100], loss: -0.052555408685293514
epoch [20/100], loss: -0.05317333483253606
epoch [21/100], loss: -0.05676594477699837
epoch [22/100], loss: -0.052053666120627895
epoch [23/100], loss: -0.05445723296543292
epoch [2

In [72]:

dataframe_result = pd.DataFrame({
    'Date': pd.Series(dates_per_feature[idx_invest[0][0]:idx_invest[-1][1]]),
    'Return': pd.Series(rdt_all.numpy().flatten()),
    'Alloc_AGG': pd.Series(alloc_all[:, 0]),
    'Alloc_DBC': pd.Series(alloc_all[:, 1]),
    'Alloc_VTI': pd.Series(alloc_all[:, 2]),
    'Alloc_^VIX': pd.Series(alloc_all[:, 3])
})

dataframe_result['Date'] = pd.to_datetime(dataframe_result['Date'])
data_na['Date'] = pd.to_datetime(data_na['Date'])
dataframe_result = pd.merge(dataframe_result, data_na[['Date', 'AGG_y', 'DBC_y', 'VTI_y', '^VIX_y']], on='Date', how='left')

dataframe_result['Final_Return'] = (
    dataframe_result['Alloc_AGG'] * dataframe_result['AGG_y'] +
    dataframe_result['Alloc_DBC'] * dataframe_result['DBC_y'] +
    dataframe_result['Alloc_VTI'] * dataframe_result['VTI_y'] +
    dataframe_result['Alloc_^VIX'] * dataframe_result['^VIX_y']
)

dataframe_result

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_na['Date'] = pd.to_datetime(data_na['Date'])


Unnamed: 0,Date,Return,Alloc_AGG,Alloc_DBC,Alloc_VTI,Alloc_^VIX,AGG_y,DBC_y,VTI_y,^VIX_y,Final_Return
0,2010-04-19,0.000069,0.386544,0.006146,0.545118,0.062192,0.001632,0.007936,0.009474,-0.092849,0.000069
1,2010-04-20,0.002311,0.560603,0.005818,0.390092,0.043487,0.001724,0.004973,-0.000809,0.037508,0.002311
2,2010-04-21,0.001444,0.543324,0.009969,0.368171,0.078537,-0.002008,0.002474,0.004858,0.009191,0.001444
3,2010-04-22,0.004745,0.286030,0.009746,0.588584,0.115641,-0.001246,0.006582,0.006769,0.009108,0.004745
4,2010-04-23,0.001490,0.206501,0.006997,0.707580,0.078922,-0.000096,-0.004904,-0.003522,0.051143,0.001490
...,...,...,...,...,...,...,...,...,...,...,...
2690,2020-12-22,-0.000683,0.996060,0.000088,0.003473,0.000380,-0.000678,0.012517,0.001710,-0.037969,-0.000683
2691,2020-12-23,0.000775,0.992632,0.000129,0.005147,0.002093,0.000933,0.002747,0.001673,-0.076362,0.000775
2692,2020-12-24,0.000299,0.979257,0.000121,0.018896,0.001727,0.000170,-0.006164,0.006327,0.007896,0.000299
2693,2020-12-28,0.000286,0.988904,0.000417,0.008284,0.002395,0.000169,0.002757,-0.004175,0.063594,0.000286


In [73]:
trading_days_per_year = 252

mean_daily_return = dataframe_result['Final_Return'].mean()
annualized_return = (1+mean_daily_return) ** trading_days_per_year -1

std_daily_return = dataframe_result['Final_Return'].std()
std_dd_return = dataframe_result[dataframe_result['Final_Return'] < 0]['Final_Return'].std()

annualized_volatility = std_daily_return * np.sqrt(trading_days_per_year)
annualized_volatility_dd = std_dd_return * np.sqrt(trading_days_per_year)

sharpe_ratio = annualized_return / annualized_volatility
sortino_ratio = annualized_return / annualized_volatility_dd

print(f"annualized mean return: {annualized_return:.4f}")
print(f"annualized std: {annualized_volatility:.4f}")
print(f"annualized dd: {annualized_volatility_dd:.4f}")
print(f"Sharpe Ratio: {sharpe_ratio:.4f}")
print(f"Sortino Ratio: {sortino_ratio:.4f}")

annualized mean return: 0.2298
annualized std: 0.1574
annualized dd: 0.0952
Sharpe Ratio: 1.4604
Sortino Ratio: 2.4155


In [30]:
mini_batch_size = 64

x_batches_tensor = torch.tensor(x_batches, dtype=torch.float32)
y_batches_tensor = torch.tensor(y_batches, dtype=torch.float32)

num_batches = x_batches_tensor.shape[0]

input_size = 8
hidden_size = 64
output_size = 4
model_rnn = PortfolioRNN(input_size=input_size, hidden_size=hidden_size, num_layers=2, output_size=output_size)

In [34]:
optimizer = torch.optim.Adam(model_rnn.parameters(), lr=0.001)
num_epochs = 100


for epoch in range(num_epochs):
    model_rnn.train()
    loss_epoch = 0
    
    for i in range(0, num_batches - mini_batch_size + 1, mini_batch_size):
        x_mini_batch = x_batches_tensor[i:i+mini_batch_size]
        y_mini_batch = y_batches_tensor[i:i+mini_batch_size]
        
        # Check the shapes
        print(f"Batch {i // mini_batch_size + 1}:")
        print(f"x_mini_batch shape: {x_mini_batch.shape}")
        print(f"y_mini_batch shape: {y_mini_batch.shape}")
        
        optimizer.zero_grad()
        
        loss = model_rnn(x_mini_batch, y_mini_batch)
        loss_epoch += loss.item()
        loss.backward()
        optimizer.step()
    
    print(f"epoch [{epoch+1}/{num_epochs}], loss: {(loss_epoch/mini_batch_size)}")

Batch 1:
x_mini_batch shape: torch.Size([64, 50, 8])
y_mini_batch shape: torch.Size([64, 50, 0])


RuntimeError: The size of tensor a (4) must match the size of tensor b (0) at non-singleton dimension 2

In [12]:
model = PortfolioRNN(input_size, hidden_size, num_layers, num_classes).to(device)

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

In [13]:
sequence_length = 28
input_size = 28

for epoch in range(len(train_loader)):
    for i, (images, labels) in enumerate(train_loader):
        images = images.reshape(-1, sequence_length, input_size).to(device)
        labels = labels.to(device)

        outputs = model(images)
        loss = criterion(outputs, labels)

        optimizer.zero_grad
        loss.backward()
        optimizer.step()

        if (i+1)% 100 ==0:
            print(f'Epoch {epoch+1}/{num_epochs}, Step {i+1}/{len(train_loader)}, Loss = {loss.item()}')

Epoch 1/2, Step 100/600, Loss = 1.7561472654342651
Epoch 1/2, Step 200/600, Loss = 1.1913491487503052
Epoch 1/2, Step 300/600, Loss = 0.4377589523792267
Epoch 1/2, Step 400/600, Loss = 1.1186847686767578
Epoch 1/2, Step 500/600, Loss = 1.1297461986541748
Epoch 1/2, Step 600/600, Loss = 1.624846339225769
Epoch 2/2, Step 100/600, Loss = 0.710981011390686
Epoch 2/2, Step 200/600, Loss = 0.7087716460227966
Epoch 2/2, Step 300/600, Loss = 1.1653952598571777
Epoch 2/2, Step 400/600, Loss = 1.0352438688278198
Epoch 2/2, Step 500/600, Loss = 0.6385011076927185
Epoch 2/2, Step 600/600, Loss = 0.748671293258667
Epoch 3/2, Step 100/600, Loss = 1.1222331523895264
Epoch 3/2, Step 200/600, Loss = 0.8393617272377014
Epoch 3/2, Step 300/600, Loss = 0.7829921245574951
Epoch 3/2, Step 400/600, Loss = 0.897032618522644
Epoch 3/2, Step 500/600, Loss = 0.9496373534202576
Epoch 3/2, Step 600/600, Loss = 0.8489352464675903
Epoch 4/2, Step 100/600, Loss = 0.5647134780883789
Epoch 4/2, Step 200/600, Loss = 1.4

KeyboardInterrupt: 

In [None]:
with torch.no_grad():
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 784).to(device)
        labels = labels.to(device)
        outputs = model(images)

        _, predictions = torch.max(outputs, 1)
        n_samples += labels.shape(0)
        n_correct += (predictions == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy = {acc}') 