In [1]:
import os
import sys
module_path = os.path.abspath('..')
sys.path.append(module_path)
from lc.measurements import CurveMeasurements

Error measurements used to compute learning curves

In [2]:
import json
data = json.load(open('../data/no_pretr_linear.json','r'))
print(json.dumps(data,indent=4))

[
    {
        "num_train_samples": 25,
        "test_errors": [
            87.2,
            85.9,
            86.63,
            86.61,
            87.61,
            86.61,
            86.44,
            85.75,
            85.35,
            85.98
        ]
    },
    {
        "num_train_samples": 50,
        "test_errors": [
            83.76,
            82.41,
            81.39,
            83.25,
            84.86,
            82.18,
            84.23,
            84.55
        ]
    },
    {
        "num_train_samples": 100,
        "test_errors": [
            82.03,
            81.86,
            81.5,
            80.72
        ]
    },
    {
        "num_train_samples": 200,
        "test_errors": [
            79.14,
            78.48
        ]
    },
    {
        "num_train_samples": 400,
        "test_errors": [
            79.72
        ]
    }
]


Load error measurements for different train set sizes to estimate a learning curve using `CurveMeasurements`. To load directly from a `.json` file use `load_from_json` or use `load_from_dict` to directly supply a dictionary that is already loaded in memory.   

In [3]:
curvems = CurveMeasurements()
curvems.load_from_json('../data/no_pretr_linear.json')
print(curvems)

--
num_train_samples: 25
test_errors: [87.2, 85.9, 86.63, 86.61, 87.61, 86.61, 86.44, 85.75, 85.35, 85.98]
num_ms: 10
--
num_train_samples: 50
test_errors: [83.76, 82.41, 81.39, 83.25, 84.86, 82.18, 84.23, 84.55]
num_ms: 8
--
num_train_samples: 100
test_errors: [82.03, 81.86, 81.5, 80.72]
num_ms: 4
--
num_train_samples: 200
test_errors: [79.14, 78.48]
num_ms: 2
--
num_train_samples: 400
test_errors: [79.72]
num_ms: 1
--



`curvems` is a list of `ErrorMeasurements`. Each `ErrorMeasurements` object stores all errors measured for a single train set size.

In [4]:
errms = curvems[3]
print(errms)

num_train_samples: 200
test_errors: [79.14, 78.48]
num_ms: 2



Get train set sizes used in the measurements

In [5]:
dataset_sizes = curvems.get_train_dataset_sizes()
print(dataset_sizes)

[25, 50, 100, 200, 400]


Get all error measurements for a specific train set size

In [6]:
errms = curvems.get_errms(100)
print(errms)

num_train_samples: 100
test_errors: [82.03, 81.86, 81.5, 80.72]
num_ms: 4

