# Hugging face for pretrained model
https://huggingface.co

Install instructions for transformer networks
https://huggingface.co/docs/transformers/installation

pip install transformers


meta-llama
Llama-3.2-1B 

https://huggingface.co/meta-llama/Llama-3.2-1B?text=My+name+is+Thomas+and+my+main

Model Information

The Llama 3.2 collection of multilingual large language models (LLMs) is a collection of pretrained and instruction-tuned generative models in 1B and 3B sizes (text in/text out). The Llama 3.2 instruction-tuned text only models are optimized for multilingual dialogue use cases, including agentic retrieval and summarization tasks. They outperform many of the available open source and closed chat models on common industry benchmarks.

Model Developer: Meta

In [None]:
"""
Make sure to update your transformers installation via pip install --upgrade transformers.
"""
import torch
from transformers import pipeline

model_id = "meta-llama/Llama-3.2-1B"

pipe = pipeline(
    "text-generation", 
    model=model_id, 
    torch_dtype=torch.bfloat16, 
    device_map="auto"
)

pipe("The key to life is")

In [None]:
# Using pipelines
def data():
    for i in range(1000):
        yield f"My example {i}"


pipe = pipeline(model="openai-community/gpt2", device=0)
generated_characters = 0
for out in pipe(data()):
    generated_characters += len(out[0]["generated_text"])

# Accessing dataset on huggingface
https://huggingface.co/datasets

# Model training and monitoring: Weights & Biases
https://wandb.ai/home

In [None]:
import wandb
import random

# start a new wandb run to track this script
wandb.init(
    # set the wandb project where this run will be logged
    project="my-awesome-project",

    # track hyperparameters and run metadata
    config={
    "learning_rate": 0.02,
    "architecture": "CNN",
    "dataset": "CIFAR-100",
    "epochs": 10,
    }
)

# simulate training
epochs = 10
offset = random.random() / 5
for epoch in range(2, epochs):
    acc = 1 - 2 ** -epoch - random.random() / epoch - offset
    loss = 2 ** -epoch + random.random() / epoch + offset

    # log metrics to wandb
    wandb.log({"acc": acc, "loss": loss})

# [optional] finish the wandb run, necessary in notebooks
wandb.finish()

Wandb lets you sweep for model parameters

In [None]:
# Import the W&B Python Library and log into W&B
import wandb

wandb.login()

# 1: Define objective/training function
def objective(config):
    score = config.x**3 + config.y
    return score

def main():
    wandb.init(project="my-first-sweep")
    score = objective(wandb.config)
    wandb.log({"score": score})

# 2: Define the search space
sweep_configuration = {
    "method": "random",
    "metric": {"goal": "minimize", "name": "score"},
    "parameters": {
        "x": {"max": 0.1, "min": 0.01},
        "y": {"values": [1, 3, 7]},
    },
}

# 3: Start the sweep
sweep_id = wandb.sweep(sweep=sweep_configuration, project="my-first-sweep")

wandb.agent(sweep_id, function=main, count=10)

Also supports command line interface

https://docs.wandb.ai/guides/sweeps/pause-resume-and-cancel-sweeps

# Config file for a sweep (.yaml)

program: run_model_training.py

method: bayes

metric:

  name: val_loss

  goal: minimize

parameters:

  num_GCN_layers:

    values: [2,3]

  num_GCNneurons:

    values: [512,1024]

  num_fc_layer:

    values: [2,3]

  k_pooling_dim:

    values: [16, 32]

  fc_layer_scaling:

    values: [16,32]

CLI command

>> wandb sweep config.yaml

In [None]:
import sys, argparse
if __name__ == "__main__":

    parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    parser.add_argument('--data', default="./data",
                        help = "Directory with training data")
    parser.add_argument('--train_test_split', default=0.2,
                        help = "ratio of train test split")
    # Architecture and training params
    parser.add_argument('--input_feat_dim',default=200, type=int,
                        help= "feature dimension size for input")
    parser.add_argument('--num_GCN_layers',default=2, type=int,
                        help='number of GCN layers to use in model (default:4)')
    parser.add_argument('--num_GCNneurons',default=256, type=int,
                        help='list of number of neurons to use per GCN Layer (default:256)')
    parser.add_argument('--k_pooling_dim',default=16, type=int,
                        help= "size of k-pooling layer output dimension")
    parser.add_argument('--num_fc_layer',default=4, type=int,
                        help='number of FC to use  to generate predictions (default: 3)')
    parser.add_argument('--fc_layer_scaling',default=200, type=int,
                        help='Scaling of FC layers wrt graph output dimension (last layer has the output class dimension)')
        
    parser.add_argument('--num_classes',default=10, type=int,
                        help='number of label classes in data')
    parser.add_argument('--dropout_rate',default=0.5, type=float)
    parser.add_argument('--num-epochs',default=200, type=int)
    parser.add_argument('--batch-size',default=2056, type=int)
    parser.add_argument('--lr',default=0.001, type=float)
    parser.add_argument('--convergence_lr',default=1e-5, type=float)
    # Run specific params
    parser.add_argument('--model_save_dir',default= "./model_wt/")
    parser.add_argument('--model_save_th',default= 0.7, type = int,
                        help = "Accuracy threshold for saving a model")

    args = parser.parse_args(sys.argv[1:])