In [1]:
import os
import requests
import json
import numpy as np
import pandas as pd

import seaborn as sns
import matplotlib.pylab as plt

import torch
from torch import nn
from torch.utils.data import DataLoader, TensorDataset, random_split

In [2]:
DATA_PATH = "../data"

BATCH_SIZE = 4

In [3]:
grades = []
grade_dict = {}
inv_grade_dict = {}
grade_ct = 0
for num in range(6, 9):
    for let in ["A","B","C"]:
        for pls in ["","+"]:
            grades.append(str(num)+let+pls)
            grade_dict[str(num)+let+pls] = grade_ct
            inv_grade_dict[grade_ct] = str(num)+let+pls
            grade_ct += 1

print(len(grades))

18


In [4]:
def load_raw_data():
    ids = [file_name.replace(".json", "") for file_name in os.listdir(DATA_PATH) if file_name.find(".json") != -1] 
    
    data_dict = {}
    for id in ids:
        with open(os.path.join(DATA_PATH, id + ".json"), "r") as f:
            js = json.load(f)
    
        boulder = np.load(os.path.join(DATA_PATH, id + ".npz"))["arr_0"]
    
        data_dict[id] = {
            "meta_info": js,
            "boulder": boulder,
        }
    return data_dict

In [5]:
raw_data = load_raw_data()

In [6]:
X = []
y = []
for key in raw_data.keys():
    grade = grade_dict[raw_data[key]["meta_info"]["grade"]]
    boulder = raw_data[key]["boulder"]

    X.append(np.sum(boulder, axis=0))
    y.append([float(grade)])

y = np.array(y)
y = y/18
X = torch.Tensor(np.array(X).astype(float))
y = torch.Tensor(np.array(y).astype(float))

train_tensor,test_tensor = random_split(TensorDataset(X, y),[0.8, 0.2])
train_loader =  DataLoader(train_tensor, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(test_tensor, batch_size=BATCH_SIZE, shuffle=True)


In [72]:
def pred_to_grade(pred):
    try:
        return [inv_grade_dict[max(round(p[0]), 0)] for p in (pred.to("cpu").numpy()*18).astype("float")]
    except:
        return [inv_grade_dict[max(round(p), 0)] for p in (pred.to("cpu").numpy()*18).astype("float")]

In [8]:
# Get cpu, gpu or mps device for training.
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")




Using cuda device


In [9]:
def train(dataloader, model, loss_fn, optimizer, epochs=3):
    size = len(dataloader.dataset)
    model.train()
    for epoch in range(epochs):
        print("Epoch: ", epoch)
        for batch, (X, y) in enumerate(dataloader):
            X, y = X.to(device), y.to(device)
    
            # Compute prediction error
            pred = model(X)
            loss = loss_fn(pred, y)
    
            # Backpropagation
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
    
            if batch % 100 == 0:
                loss, current = loss.item(), (batch + 1) * len(X)
                print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [10]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")


In [41]:
# Define model
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(198, 150),
            nn.Tanh(),
            nn.Linear(150, 100),
            nn.Tanh(),
            nn.Linear(100, 50),
            nn.Tanh(),
            nn.Linear(50, 1)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits


In [48]:

model = NeuralNetwork().to(device)
print(model)
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(model.parameters(), lr=3* 1e-2)
train(train_loader, model, loss_fn, optimizer, epochs=15)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=198, out_features=150, bias=True)
    (1): Tanh()
    (2): Linear(in_features=150, out_features=100, bias=True)
    (3): Tanh()
    (4): Linear(in_features=100, out_features=50, bias=True)
    (5): Tanh()
    (6): Linear(in_features=50, out_features=1, bias=True)
  )
)
Epoch:  0
loss: 0.239681  [    4/ 4000]
loss: 0.128987  [  404/ 4000]
loss: 0.067587  [  804/ 4000]
loss: 0.055125  [ 1204/ 4000]
loss: 0.160400  [ 1604/ 4000]
loss: 0.022042  [ 2004/ 4000]
loss: 0.048728  [ 2404/ 4000]
loss: 0.056889  [ 2804/ 4000]
loss: 0.039484  [ 3204/ 4000]
loss: 0.052616  [ 3604/ 4000]
Epoch:  1
loss: 0.053737  [    4/ 4000]
loss: 0.100268  [  404/ 4000]
loss: 0.058877  [  804/ 4000]
loss: 0.062364  [ 1204/ 4000]
loss: 0.076304  [ 1604/ 4000]
loss: 0.027758  [ 2004/ 4000]
loss: 0.068248  [ 2404/ 4000]
loss: 0.091422  [ 2804/ 4000]
loss: 0.090538  [ 3204/ 4000]
loss: 0.062398  

In [78]:
lst = []
with torch.no_grad():
    for X, y in test_loader:
        X, y = X.to(device), y.to(device)
        pred = model(X).flatten()
        print("pred:", pred * 18, "true:", y * 18)
        #print(X)
        for pred_, y_ in zip(pred * 18, y * 18):
            lst.append({
                "pred":  pred_.to("cpu").numpy(),
                "true": y_.to("cpu").numpy()[0],
                "pred_cls": inv_grade_dict[round(pred_.to("cpu").numpy())],
                "true_cls": inv_grade_dict[round(y_.to("cpu").numpy()[0])],
                "diff": np.abs(pred_.to("cpu").numpy() - y_.to("cpu").numpy()[0])
            })

pred: tensor([5.5868, 4.9379, 3.9703, 5.3974], device='cuda:0') true: tensor([[3.],
        [6.],
        [3.],
        [6.]], device='cuda:0')


TypeError: type numpy.ndarray doesn't define __round__ method

In [67]:
df = pd.DataFrame.from_records(lst)
len(df[df["pred_cls"] == df["true_cls"]])/len(df)

0.264

In [68]:
df.head()

Unnamed: 0,pred,true,pred_cls,true_cls,diff
0,3.8366418,4.0,6B+,6C,0.163358
1,1.7416227,1.0,6A+,6A+,0.741623
2,4.4101424,5.0,6C,6C+,0.589858
3,7.655112,8.0,7A+,7B,0.344888
4,2.8900294,2.0,6B,6B,0.890029


In [71]:
len(df[df["diff"] < 1]) / 1000

0.515

In [70]:
len(df)

1000