In [8]:
import torch
import numpy as np
from kneed import KneeLocator




In [3]:
def smooth(values, window_size):

    """ Glättet eine Liste von Werten mittels eines einfachen gleitenden Durchschnitts. """
    smoothed_values = []
    for i in range(len(values) - window_size + 1):
        window = values[i:i + window_size]
        window_average = sum(window) / window_size
        smoothed_values.append(window_average)
    return smoothed_values

In [10]:
# Number of matrices and singular values
def print_knee(matrixname):

    for i in range(9):
        chts = ['ckpt0.pt', 'ckpt250.pt','ckpt500.pt','ckpt750.pt','ckpt1000.pt','ckpt1250.pt','ckpt1500.pt','ckpt1750.pt','ckpt2000.pt']
        checkpoint = torch.load("./out-shakespeare-char/" + chts[i])
        matrix = checkpoint['model'][matrixname].cpu().numpy()

        # Calculate the singular values using SVD
        _, s, _ = np.linalg.svd(matrix)

        s = smooth(s, 5)
        x = range(len(s))
        y = s
        kneedle = KneeLocator(x, y, S=0.1, curve="convex", direction="decreasing")

        print(f"Iterationen {(i)*250}" + " Knee: "+ str(kneedle.knee))
       

In [11]:
tables = [
'transformer.h.0.attn.c_attn.weight',
'transformer.h.0.attn.c_proj.weight',
'transformer.h.0.mlp.c_fc.weight',
'transformer.h.0.mlp.c_proj.weight',
'transformer.h.1.attn.c_attn.weight',
'transformer.h.1.attn.c_proj.weight',
'transformer.h.1.mlp.c_fc.weight',
'transformer.h.1.mlp.c_proj.weight',
'transformer.h.2.attn.c_attn.weight',
'transformer.h.2.attn.c_proj.weight',
'transformer.h.2.mlp.c_fc.weight',
'transformer.h.2.mlp.c_proj.weight',
'transformer.h.3.attn.c_attn.weight',
'transformer.h.3.attn.c_proj.weight',
'transformer.h.3.mlp.c_fc.weight',
'transformer.h.3.mlp.c_proj.weight',
]
for name in tables:
    print(name)
    print_knee(name)

transformer.h.0.attn.c_attn.weight
Iterationen 0Knee: 21
Iterationen 250Knee: 16
Iterationen 500Knee: 20
Iterationen 750Knee: 20
Iterationen 1000Knee: 18
Iterationen 1250Knee: 19
Iterationen 1500Knee: 19
Iterationen 1750Knee: 17
Iterationen 2000Knee: 17
transformer.h.0.attn.c_proj.weight
Iterationen 0Knee: 12
Iterationen 250Knee: 15
Iterationen 500Knee: 18
Iterationen 750Knee: 24
Iterationen 1000Knee: 29
Iterationen 1250Knee: 33
Iterationen 1500Knee: 33
Iterationen 1750Knee: 33
Iterationen 2000Knee: 33
transformer.h.0.mlp.c_fc.weight
Iterationen 0Knee: 13
Iterationen 250Knee: 19
Iterationen 500Knee: 27
Iterationen 750Knee: 32
Iterationen 1000Knee: 34
Iterationen 1250Knee: 35
Iterationen 1500Knee: 36
Iterationen 1750Knee: 36
Iterationen 2000Knee: 36
transformer.h.0.mlp.c_proj.weight
Iterationen 0Knee: 30
Iterationen 250Knee: 24
Iterationen 500Knee: 29
Iterationen 750Knee: 30
Iterationen 1000Knee: 36
Iterationen 1250Knee: 36
Iterationen 1500Knee: 37
Iterationen 1750Knee: 39
Iterationen 2

In [24]:
def print_maxknee(matrixname):

    checkpoint = torch.load("./out-shakespeare-char_128/ckpt2000.pt")
    matrix = checkpoint['model'][matrixname].cpu().numpy()

    # Calculate the singular values using SVD
    _, s, _ = np.linalg.svd(matrix)

    s = smooth(s, 5)
    x = range(len(s))
    y = s
    kneedle = KneeLocator(x, y, S=0.1, curve="convex", direction="decreasing")

    print(kneedle.knee)

In [25]:
for name in tables:
    print(name)
    print_maxknee(name)

transformer.h.0.attn.c_attn.weight
17
transformer.h.0.attn.c_proj.weight
33
transformer.h.0.mlp.c_fc.weight
36
transformer.h.0.mlp.c_proj.weight
39
transformer.h.1.attn.c_attn.weight
21
transformer.h.1.attn.c_proj.weight
27
transformer.h.1.mlp.c_fc.weight
27
transformer.h.1.mlp.c_proj.weight
39
transformer.h.2.attn.c_attn.weight
25
transformer.h.2.attn.c_proj.weight
30
transformer.h.2.mlp.c_fc.weight
27
transformer.h.2.mlp.c_proj.weight
43
transformer.h.3.attn.c_attn.weight
27
transformer.h.3.attn.c_proj.weight
27
transformer.h.3.mlp.c_fc.weight
29
transformer.h.3.mlp.c_proj.weight
41
