-
Notifications
You must be signed in to change notification settings - Fork 30
/
llm-miner-starter.sh
executable file
·386 lines (325 loc) · 13.9 KB
/
llm-miner-starter.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
#!/bin/bash
log_info() {
# Blue color for informational messages
echo -e "\033[0;34mINFO: $1\033[0m" >&2
}
log_warning() {
# Yellow color for warning messages, printed to stderr
echo -e "\033[0;33mWARNING: $1\033[0m" >&2
}
log_error() {
# Red color for error messages, printed to stderr
echo -e "\033[0;31mERROR: $1\033[0m" >&2
}
# Function to check command prerequisites and report all missing dependencies
check_prerequisites() {
local missing_prerequisites=()
# Base prerequisites without considering Python venv yet
local prerequisites=("jq" "wget" "bc")
# Determine the default Python version
local python_version=$(python3 --version 2>&1 | grep -oP 'Python \K[0-9]+\.[0-9]+')
# Decide whether to check for python3-venv or python3.8-venv based on Python version
if [[ "$python_version" =~ ^3\.(8|9|10|11)$ ]]; then
prerequisites+=("python3-venv")
else
prerequisites+=("python3.8-venv")
fi
for prerequisite in "${prerequisites[@]}"; do
# Handle Python venv packages separately
if [[ "$prerequisite" == "python3-venv" || "$prerequisite" == "python3.8-venv" ]]; then
if ! dpkg -l | grep -q "$prerequisite"; then
missing_prerequisites+=("$prerequisite")
fi
# Check for the presence of other executable commands
elif ! command -v "$prerequisite" &> /dev/null; then
missing_prerequisites+=("$prerequisite")
fi
done
if [ ${#missing_prerequisites[@]} -eq 0 ]; then
log_info "All prerequisites are satisfied."
else
for missing in "${missing_prerequisites[@]}"; do
if [[ "$missing" == "python3-venv" || "$missing" == "python3.8-venv" ]]; then
log_error "$missing is not installed but is required. Please install $missing with the following command: sudo apt update && sudo apt upgrade && sudo apt install software-properties-common && sudo add-apt-repository ppa:deadsnakes/ppa && sudo apt install $missing"
else
log_error "$missing is not installed but is required. Please install $missing with the following command: sudo apt update && sudo apt install $missing"
fi
done
exit 1
fi
}
# Validate internet connectivity to essential services
validate_connectivity() {
# List of essential URLs to check connectivity
local urls=("https://huggingface.co")
for url in "${urls[@]}"; do
if ! wget --spider -q "$url"; then
log_error "Unable to connect to $url. Check your internet connection or access to the site."
exit 1
else
log_info "Connectivity to $url verified."
fi
done
}
setup_venv_environment() {
local env_dir="llm-venv"
local cfg_file="$env_dir/pyvenv.cfg"
# Function to parse the Python version from pyvenv.cfg
get_python_version_from_venv() {
local version_line=$(grep 'version = ' "$cfg_file")
echo "$version_line" | cut -d ' ' -f 3
}
if [ -d "$env_dir" ] && [ -f "$cfg_file" ]; then
local venv_python_version=$(get_python_version_from_venv)
if [[ "$venv_python_version" =~ ^3\.(8|9|10|11)(\.[0-9]+)?$ ]]; then
log_info "Virtual environment exists with Python version $venv_python_version, which is within the desired range."
else
log_info "Virtual environment detected with Python version $venv_python_version, which is outside the desired range. Recreating environment..."
rm -rf "$env_dir"
python3 -m venv "$env_dir"
log_info "Virtual environment re-created with the current Python version."
fi
elif [ ! -d "$env_dir" ]; then
log_info "Creating a new virtual environment with venv..."
python3 -m venv "$env_dir"
log_info "Virtual environment created."
fi
source "$env_dir/bin/activate"
log_info "Virtual environment activated."
}
setup_conda_environment() {
log_info "Updating package lists..."
sudo apt-get update -qq >/dev/null 2>&1
if [ -d "$HOME/miniconda" ]; then
log_info "Miniconda already installed at $HOME/miniconda. Proceed to create a conda environment."
else
log_info "Installing Miniconda..."
wget --quiet --show-progress --progress=bar:force:noscroll https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh
bash ~/miniconda.sh -b -p $HOME/miniconda
export PATH="$HOME/miniconda/bin:$PATH"
rm ~/miniconda.sh
fi
# Ensure Conda is correctly initialized
source ~/miniconda/bin/activate
~/miniconda/bin/conda init bash >/dev/null 2>&1
# Source .bashrc to update the path for conda, if it exists
if [ -f "$HOME/.bashrc" ]; then
log_info "Sourcing .bashrc to update the path for conda"
source "$HOME/.bashrc"
elif [ -f "$HOME/.bash_profile" ]; then
# Fallback for systems that use .bash_profile instead of .bashrc
log_info "Sourcing .bash_profile to update the path for conda"
source "$HOME/.bash_profile"
else
log_error "Could not find a .bashrc or .bash_profile file to source."
fi
# Check if the Conda environment already exists
if conda info --envs | grep 'llm-venv' > /dev/null; then
log_info "Conda environment 'llm-venv' already exists. Skipping creation."
else
log_info "Creating a virtual environment with Miniconda..."
# Suppressing the output completely, consider logging at least errors
conda create -n llm-venv python=3.11 -y --quiet >/dev/null 2>&1
log_info "Conda virtual environment 'llm-venv' created."
fi
conda activate llm-venv
log_info "Conda virtual environment 'llm-venv' activated."
}
install_with_spinner() {
local dep=$1
(
pip install "$dep" > /dev/null 2>&1
echo $? > /tmp/install_exit_status.tmp
) &
pid=$! # PID of the pip install process
spinner="/-\|"
# Use printf for consistent formatting
printf "Installing %-20s" "$dep..."
while kill -0 $pid 2> /dev/null; do
for i in $(seq 0 3); do
printf "\b${spinner:i:1}"
sleep 0.2
done
done
wait $pid
exit_status=$(cat /tmp/install_exit_status.tmp)
rm /tmp/install_exit_status.tmp
if [ $exit_status -eq 0 ]; then
printf "\b Done.\n"
else
printf "\b Failed.\n"
return 1
fi
}
# Example usage for your dependency installation function
install_dependencies() {
log_info "Installing Python dependencies..."
local dependencies=("vllm" "python-dotenv" "toml" "openai" "triton==2.1.0" "wheel" "packaging" "psutil" "web3" "mnemonic" "prettytable")
for dep in "${dependencies[@]}"; do
if ! install_with_spinner "$dep"; then
log_error "Failed to install $dep."
exit 1
fi
done
log_info "All dependencies installed successfully."
}
# Retrieve model size, quantization and name information
fetchModelDetails() {
local heurist_model_id="$1"
log_info "Fetching model details for $heurist_model_id..."
local models_json=$(curl -s https://raw.githubusercontent.com/heurist-network/heurist-models/main/models.json)
if [ -z "$models_json" ]; then
log_error "Failed to fetch model details from $models_json_url"
exit 1
fi
local model_found=$(echo "$models_json" | jq -r --arg heurist_model_id "$heurist_model_id" '.[] | select(.name == $heurist_model_id)')
if [ -z "$model_found" ]; then
log_error "Heurist Model ID '$heurist_model_id' not found in models.json."
exit 1
fi
# Extracting necessary details
local size_gb=$(echo "$model_found" | jq -r '.size_gb')
local quantization=$(echo "$model_found" | jq -r '.type' | grep -q '16b' && echo "None" || echo "gptq")
local hf_model_id=$(echo "$model_found" | jq -r '.hf_id')
local revision=$(echo "$model_found" | jq -r '.hf_branch // "None"')
log_info "Model details: HF_ID=$hf_model_id, Size_GB=$size_gb, Quantization=$quantization, Revision=$revision"
# Echoing the details for capture by the caller
echo "$size_gb $quantization $hf_model_id $revision"
}
validateMinerId() {
local miner_id=$1
local config_file=$2
local abi_file=$3
# Call the WalletGenerator class directly
python -c "
from auth.generator import WalletGenerator
config_file = '$config_file'
abi_file = '$abi_file'
miner_id = '$miner_id'
wallet_generator = WalletGenerator(config_file, abi_file)
wallet_generator.validate_miner_keys([miner_id])
"
local exit_code=$?
if [ $exit_code -ne 0 ]; then
log_error "Wallet validation failed for Miner ID: $miner_id"
exit 1
fi
}
# Validate GPU VRAM is enough to host expected model
validateVram() {
local size_gb="$1"
# Assuming the size_gb is the required VRAM in GB, convert it to MB
local required_mb=$(echo "$size_gb*1024" | bc)
log_info "Validating available VRAM against model requirements..."
# Check if nvidia-smi is available
if ! command -v nvidia-smi &> /dev/null; then
log_error "nvidia-smi tool not found. Unable to check available VRAM."
exit 1
fi
# Fetch the available VRAM in MB
local available_mb=$(nvidia-smi --query-gpu=memory.free --format=csv,noheader,nounits | awk -v gpu_id="$gpu_ids" 'NR==gpu_id+1{print $1}')
if [ -z "$available_mb" ]; then
log_error "Failed to fetch available VRAM."
exit 1
fi
log_info "Available VRAM: ${available_mb}MB, Required VRAM: ${required_mb}MB"
# Compare available and required VRAM
if [ "$available_mb" -lt "$required_mb" ]; then
log_error "Insufficient VRAM. Available: ${available_mb}MB, Required: ${required_mb}MB."
exit 1
else
log_info "Sufficient VRAM available. Proceeding..."
fi
# Determine GPU memory utilization based on model name and available VRAM
if [[ "$heurist_model_id" == *"mixtral-8x7b-gptq"* ]] && [ "$available_mb" -gt 32000 ]; then
local gpu_memory_util=$(echo "scale=2; (32000-1000)/$available_mb" | bc)
elif [[ "$heurist_model_id" == *"yi-34b-gptq"* ]] && [ "$available_mb" -gt 40000 ]; then
local gpu_memory_util=$(echo "scale=2; (40000-1000)/$available_mb" | bc)
elif [[ "$heurist_model_id" == *"70b"* ]] && [ "$available_mb" -gt 44000 ]; then
local gpu_memory_util=$(echo "scale=2; (44000-1000)/$available_mb" | bc)
elif [[ "$heurist_model_id" == *"8b"* ]] && [ "$available_mb" -gt 18500 ]; then
local gpu_memory_util=$(echo "scale=2; (18500-1000)/$available_mb" | bc)
elif [[ "$heurist_model_id" == *"pro-mistral-7b"* ]] && [ "$available_mb" -gt 18000 ]; then
local gpu_memory_util=$(echo "scale=2; (18000-1000)/$available_mb" | bc)
else
local gpu_memory_util=$(echo "scale=2; (12000-1000)/$available_mb" | bc) # Default value or handle other cases as needed
fi
# Output the gpu_memory_util value
printf "%.2f" "$gpu_memory_util"
}
getModelId() {
local heurist_model_id="$1"
# If no model ID was provided, exit with an error message
if [ -z "$heurist_model_id" ]; then
log_error "No model ID provided. Please provide a model ID. See https://docs.heurist.ai/integration/supported-models for supported models."
exit 1
fi
# Return the determined model ID
echo "$heurist_model_id"
}
main() {
log_info "Starting script execution..."
check_prerequisites
validate_connectivity
setup_conda_environment
install_dependencies
# Default values for the new arguments
local miner_id_index=0
local port=8000
local gpu_ids="0" # User can specify GPUs to use. Example: "0,1" for GPUs 0 and 1.
local skip_signature=false
# Fetch model details including the model ID, required VRAM size, quantization method, and model name
heurist_model_id=$(getModelId "$1") || exit 1
read -r size_gb quantization hf_model_id revision < <(fetchModelDetails "$heurist_model_id")
shift 1
# Parse additional arguments
while (( "$#" )); do
case "$1" in
--miner-id-index)
miner_id_index=$2
shift 2
;;
--port)
port=$2
shift 2
;;
--gpu-ids)
gpu_ids=$2
shift 2
;;
--skip-signature)
skip_signature=true
shift
;;
*) # unrecognized argument
break
;;
esac
done
# Extract the miner ID from the .env file based on the miner_id_index
miner_id=$(sed -n "s/^MINER_ID_$miner_id_index=//p" .env)
# Validate the miner ID only if skip_signature is false
if ! $skip_signature; then
validateMinerId "$miner_id" "config.toml" "auth/abi.json"
fi
# Check if the model details were not properly fetched
if [ -z "$size_gb" ] || [ -z "$quantization" ] || [ -z "$hf_model_id" ] || [ -z "$revision" ]; then
log_error "Failed to fetch model details. Exiting."
exit 1
fi
# Validate if the system has enough VRAM for the model
gpu_memory_util=$(validateVram "$size_gb")
log_info "GPU Memory Utilization ratio for vllm: $gpu_memory_util"
# Assuming all validations passed, proceed to execute the Python script with the model details
log_info "Executing Python script with Heurist model ID: $heurist_model_id, Quantization: $quantization, HuggingFace model ID: $hf_model_id, Revision: $revision, Miner ID Index: $miner_id_index, Port: $port, GPU IDs: $gpu_ids"
local python_script=$(ls llm-miner-*.py | head -n 1)
if [[ -n "$python_script" ]]; then
python "$python_script" "$hf_model_id" "$quantization" "$heurist_model_id" $gpu_memory_util "$revision" "$miner_id_index" "$port" "$gpu_ids" "$skip_signature"
log_info "Python script executed successfully."
else
log_error "No Python script matching 'llm-miner-*.py' found."
exit 1
fi
log_info "Script execution completed."
}
main "$@"