-
Notifications
You must be signed in to change notification settings - Fork 0
/
gptv2.py
111 lines (88 loc) · 2.83 KB
/
gptv2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import time
from model_gpt import get_device
from model_gpt import get_optimizer
from model_gpt import load_model
from model_gpt import train_model
from model_gpt import write_model
from model_gpt import setup_logging
from model_gpt import start_log_training_session
from model_gpt import finish_log_training_session
device = get_device()
# Hyperparameters.
# Affects memory.
batch_size = 128 # Change for GPU. (4 test, 128 train)
block_size = 64 # Change for GPU. (v1 8 test, 64 train) - (v2 32 test, x train)
# Does not affect memory.
max_iterations = 20000 # Change for GPU. (v1 1000 test, 3000 train) - (v2 200 test, x train)
learning_rate = 3e-4 # 3e-3 = 0.003 - 3e-4, 1e-3, 1e-4
eval_iterations = 4000 # Change for purpose. (v1 250 test, 500 train) - (v2 100 test, x train)
# Affect memory.
n_embed = 384 # Amount of neurons in the embedding layer.
n_head = 8 # Amount of heads (in parallel). (v1 4 for mps 8 for cuda) - (v2 1 test)
n_layer = 8 # Amount of layers (equal to number of decoder blocks). (v1 4 for mps 8 for cuda) - (v2 1 test)
# Does not affect memory.
dropout = 0.2 # Dropout rate. 20% of the neurons will be turned off.
# 127000
# Open the text file.
with open('vocab.txt', 'r', encoding='utf-8') as file:
text = file.read()
# Get the set of unique characters in the text.
chars = sorted(list(set(text)))
vocab_size = len(chars)
# Create a tokenizer to convert between characters and numerical indices via an encoder and a decoder.
strings_to_ints = {c: i for i, c in enumerate(chars)}
ints_to_strings = {i: c for i, c in enumerate(chars)}
def encode(s):
return [strings_to_ints[c] for c in s]
def decode(x):
return ''.join([ints_to_strings[i] for i in x])
training_data_filemap = {
"train": "output_train.txt",
"val": "output_val.txt"
}
model = load_model(
"model-02.pkl",
vocab_size,
device,
n_embed,
block_size,
n_head,
n_layer,
dropout
)
# Create the optimizer.
optimizer = get_optimizer(model, learning_rate)
hyperparameters = {
"batch_size": batch_size,
"block_size": block_size,
"max_iterations": max_iterations,
"learning_rate": learning_rate,
"eval_iterations": eval_iterations,
"n_embed": n_embed,
"n_head": n_head,
"n_layer": n_layer,
"dropout": dropout
}
model_name = "model-02"
log_file_path = setup_logging(model_name)
# Track when the training starts
start_time = time.time()
start_log_training_session(log_file_path, hyperparameters)
# Train the model.
train_model(
model,
max_iterations,
optimizer,
eval_iterations,
training_data_filemap,
block_size,
batch_size,
encode,
device,
log_file_path,
multiplier=1
)
# Calculate the training duration
training_duration = time.time() - start_time
finish_log_training_session(log_file_path, training_duration)
write_model(model_name + ".pkl", model)