In [None]:
from huggingface_hub import login

login("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", add_to_git_credential=True)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm import tqdm
import matplotlib.pyplot as plt

def analyze_layers_in_range(model, lower_bound, upper_bound):
    layers_in_range = []
    layers_out_of_range = []
    all_layers = []  # To keep track of the order in which layers are encountered

    for name, param in tqdm(model.named_parameters(), desc="Analyzing parameters"):
        if param.requires_grad:
            param_cpu = param.detach().cpu().float()
            
            # Check if all parameters in the layer are within the specified range
            in_range = ((param_cpu >= lower_bound) & (param_cpu <= upper_bound)).all().item()
            
            if in_range:
                layers_in_range.append(name)
            else:
                layers_out_of_range.append(name)
            
            # Record the order of layers
            all_layers.append((name, in_range))

    return layers_in_range, layers_out_of_range, all_layers

def print_layer_inlier_outlier(all_layers):
    print("\nLayer Status Report:")
    print("=" * 40)
    
    # Print information for each layer in the order they were encountered
    for layer_name, in_range in all_layers:
        if in_range:
            print(f"{layer_name}: Inlier (All parameters within range)")
        else:
            print(f"{layer_name}: Outlier (Some parameters out of range)")

def plot_layer_status(all_layers, output_path):
    # Extract statuses
    statuses = [1 if in_range else 0 for _, in_range in all_layers]

    plt.figure(figsize=(20, 10))  # Adjusted figure size for better readability
    bar_width = 0.8  # Set a fixed width for the bars
    x = range(len(all_layers))  # X-axis positions for the bars

    # Plot the bars
    plt.bar(x, [1] * len(all_layers), color=['green' if status == 1 else 'red' for status in statuses], width=bar_width)

    # Set the x-ticks to be numbered (0, 1, 2, ..., len(all_layers)-1)
    plt.xticks(x[::10], [i for i in range(len(all_layers))][::10], rotation=90)  # Show every 10th tick for clarity

    # Label the axes
    plt.xlabel('Layer Index')
    plt.ylabel('Status')
    plt.title('Layer Parameter Status')

    # Remove everything under x-axis
    plt.gca().spines['bottom'].set_visible(False)
    plt.gca().spines['left'].set_visible(False)
    plt.gca().xaxis.set_ticks_position('none')
    plt.gca().yaxis.set_ticks_position('none')

    # Adjust layout to make room for labels
    plt.tight_layout()

    # Save the plot
    plt.savefig(output_path, bbox_inches='tight')  # Save the plot to the specified file path
    plt.close()  # Close the figure to free up memory

# Load model and tokenizer
model_name = "stabilityai/japanese-stablelm-base-beta-7b"  # Update with the correct model name
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, low_cpu_mem_usage=True).to(device)

lower_bound, upper_bound = -1, 1

layers_in_range, layers_out_of_range, all_layers = analyze_layers_in_range(model, lower_bound, upper_bound)

print(f"Layers with all parameters in range: {len(layers_in_range)}")
print(f"Layers with any parameters out of range: {len(layers_out_of_range)}")

# Print the layer status report in the order encountered
print_layer_inlier_outlier(all_layers)

# Define the output path (adjust this path to a location on your local system)
output_path = "layer_status_plot_stablelm.png"  # Path to save the plot

# Plot the layer status bar graph and save it
plot_layer_status(all_layers, output_path)

In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from tqdm import tqdm
import matplotlib.pyplot as plt

def analyze_layers_in_range(model, lower_bound, upper_bound):
    layers_in_range = []
    layers_out_of_range = []
    all_layers = []  # To keep track of the order in which layers are encountered

    for name, param in tqdm(model.named_parameters(), desc="Analyzing parameters"):
        if param.requires_grad:
            param_cpu = param.detach().cpu().float()
            
            # Check if all parameters in the layer are within the specified range
            in_range = ((param_cpu >= lower_bound) & (param_cpu <= upper_bound)).all().item()
            
            if in_range:
                layers_in_range.append(name)
            else:
                layers_out_of_range.append(name)
            
            # Record the order of layers
            all_layers.append((name, in_range))

    return layers_in_range, layers_out_of_range, all_layers

def print_layer_inlier_outlier(all_layers):
    print("\nLayer Status Report:")
    print("=" * 40)
    
    # Print information for each layer in the order they were encountered
    for layer_name, in_range in all_layers:
        if in_range:
            print(f"{layer_name}: Inlier (All parameters within range)")
        else:
            print(f"{layer_name}: Outlier (Some parameters out of range)")

def plot_layer_status(all_layers, output_path):
    # Extract statuses
    statuses = [1 if in_range else 0 for _, in_range in all_layers]

    plt.figure(figsize=(20, 10))  # Adjusted figure size for better readability
    bar_width = 0.8  # Set a fixed width for the bars
    x = range(len(all_layers))  # X-axis positions for the bars

    # Plot the bars
    plt.bar(x, [1] * len(all_layers), color=['green' if status == 1 else 'red' for status in statuses], width=bar_width)

    # Set the x-ticks to be numbered (0, 1, 2, ..., len(all_layers)-1)
    plt.xticks(x[::10], [i for i in range(len(all_layers))][::10], rotation=90)  # Show every 10th tick for clarity

    # Label the axes
    plt.xlabel('Layer Index')
    plt.ylabel('Status')
    plt.title('Layer Parameter Status')

    # Remove everything under x-axis
    plt.gca().spines['bottom'].set_visible(False)
    plt.gca().spines['left'].set_visible(False)
    plt.gca().xaxis.set_ticks_position('none')
    plt.gca().yaxis.set_ticks_position('none')

    # Adjust layout to make room for labels
    plt.tight_layout()

    # Save the plot
    plt.savefig(output_path, bbox_inches='tight')  # Save the plot to the specified file path
    plt.close()  # Close the figure to free up memory

# Load model and tokenizer
model_name = "openbmb/MiniCPM-V-2"  # Update with the correct model name
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float16, low_cpu_mem_usage=True).to(device)

lower_bound, upper_bound = -1, 1

layers_in_range, layers_out_of_range, all_layers = analyze_layers_in_range(model, lower_bound, upper_bound)

print(f"Layers with all parameters in range: {len(layers_in_range)}")
print(f"Layers with any parameters out of range: {len(layers_out_of_range)}")

# Print the layer status report in the order encountered
print_layer_inlier_outlier(all_layers)

# Define the output path (adjust this path to a location on your local system)
output_path = "layer_status_plot_stablelm.png"  # Path to save the plot

# Plot the layer status bar graph and save it
plot_layer_status(all_layers, output_path)


In [None]:
!pip install peft

In [2]:
!git clone https://github.com/WongKinYiu/yolov7.git


Cloning into 'yolov7'...
remote: Enumerating objects: 1197, done.[K
remote: Total 1197 (delta 0), reused 0 (delta 0), pack-reused 1197 (from 1)[K
Receiving objects: 100% (1197/1197), 74.23 MiB | 5.96 MiB/s, done.
Resolving deltas: 100% (519/519), done.


In [3]:
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7x.pt
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-w6.pt
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6.pt
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-d6.pt
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7-e6e.pt

--2024-10-12 12:59:05--  https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
Resolving github.com (github.com)... 20.27.177.113
Connecting to github.com (github.com)|20.27.177.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/b0243edf-9fb0-4337-95e1-42555f1b37cf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241012T125905Z&X-Amz-Expires=300&X-Amz-Signature=8e4b9026fbb4764ec74a28e09d23d5d5a9caa0e4bbd12d5fc9ade4a882acf668&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dyolov7.pt&response-content-type=application%2Foctet-stream [following]
--2024-10-12 12:59:05--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/b0243edf-9fb0-4337-95e1-42555f1b37cf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=relea

In [4]:
!pip install -r yolov7/requirements.txt

Collecting numpy<1.24.0,>=1.18.5 (from -r yolov7/requirements.txt (line 5))
  Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.3 kB)
Collecting thop (from -r yolov7/requirements.txt (line 36))
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Downloading numpy-1.23.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.1/17.1 MB[0m [31m36.5 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0mm
[?25hDownloading thop-0.1.1.post2209072238-py3-none-any.whl (15 kB)
Installing collected packages: numpy, thop
  Attempting uninstall: numpy
    Found existing installation: numpy 1.26.4
    Uninstalling numpy-1.26.4:
      Successfully uninstalled numpy-1.26.4
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
tensorflow

In [5]:
!wget https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt -P yolov7


--2024-10-12 13:00:16--  https://github.com/WongKinYiu/yolov7/releases/download/v0.1/yolov7.pt
Resolving github.com (github.com)... 20.27.177.113
Connecting to github.com (github.com)|20.27.177.113|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/b0243edf-9fb0-4337-95e1-42555f1b37cf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20241012%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20241012T125905Z&X-Amz-Expires=300&X-Amz-Signature=8e4b9026fbb4764ec74a28e09d23d5d5a9caa0e4bbd12d5fc9ade4a882acf668&X-Amz-SignedHeaders=host&response-content-disposition=attachment%3B%20filename%3Dyolov7.pt&response-content-type=application%2Foctet-stream [following]
--2024-10-12 13:00:16--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/511187726/b0243edf-9fb0-4337-95e1-42555f1b37cf?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=relea

## VERIFY IF MODEL IS QUANTIZED FIRST

In [25]:
import torch

SF16_MAX = 0.999969482421875
SF16_MIN = -0.999969482421875

def is_in_sf16_range(tensor):
    return torch.all((tensor >= SF16_MIN) & (tensor <= SF16_MAX))

def quantize_to_sf16(tensor):
    if is_in_sf16_range(tensor):
        # Scale to 16-bit integer range
        scaled = (tensor - SF16_MIN) / (SF16_MAX - SF16_MIN) * 65535
        # Round to nearest integer
        rounded = torch.round(scaled)
        # Convert back to SF16 range
        quantized = rounded / 65535 * (SF16_MAX - SF16_MIN) + SF16_MIN
        return quantized
    else:
        return tensor  # Return the original tensor if not in SF16 range

def apply_selective_sf16_quantization(model):
    total_params = 0
    quantized_params = 0
    memory_usage = 0

    for name, param in model.named_parameters():
        total_params += param.numel()
        if is_in_sf16_range(param.data):
            param.data = quantize_to_sf16(param.data)
            quantized_params += param.numel()
            memory_usage += param.numel() * 16 / 8  # 16 bits per parameter
        else:
            memory_usage += param.numel() * 32 / 8  # 32 bits for non-quantized parameters

    memory_usage_mb = memory_usage / (1024 * 1024)
    return total_params, quantized_params, memory_usage_mb

def verify_selective_sf16_quantization(model):
    for name, param in model.named_parameters():
        if is_in_sf16_range(param.data):
            unique_values = torch.unique(param)
            if len(unique_values) > 65536:
                print(f"Warning: Quantized parameter {name} has more than 65536 unique values")
            else:
                print(f"Layer {name} is correctly quantized to SF16")
        else:
            print(f"Layer {name} is not quantized (outside SF16 range)")

# Usage in your main script
quantized_model = torch.hub.load('yolov7', 'custom', '/kaggle/working/yolov7.pt', source='local', force_reload=True)
quantized_model.eval()
total_params, quantized_params, quantized_memory = apply_selective_sf16_quantization(quantized_model)

print(f"Total parameters: {total_params}")
print(f"Quantized parameters: {quantized_params}")
print(f"Quantization percentage: {quantized_params / total_params * 100:.2f}%")
print(f"Estimated memory usage: {quantized_memory:.2f} MB")

# Verify quantization
print("\nVerifying Selective SF16 Quantization:")
verify_selective_sf16_quantization(quantized_model)

Adding autoShape... 
Total parameters: 37620125
Quantized parameters: 37580384
Quantization percentage: 99.89%
Estimated memory usage: 71.83 MB

Verifying Selective SF16 Quantization:
Layer model.model.0.conv.weight is correctly quantized to SF16
Layer model.model.0.bn.weight is not quantized (outside SF16 range)
Layer model.model.0.bn.bias is not quantized (outside SF16 range)
Layer model.model.1.conv.weight is correctly quantized to SF16
Layer model.model.1.bn.weight is not quantized (outside SF16 range)
Layer model.model.1.bn.bias is not quantized (outside SF16 range)
Layer model.model.2.conv.weight is correctly quantized to SF16
Layer model.model.2.bn.weight is not quantized (outside SF16 range)
Layer model.model.2.bn.bias is not quantized (outside SF16 range)
Layer model.model.3.conv.weight is correctly quantized to SF16
Layer model.model.3.bn.weight is not quantized (outside SF16 range)
Layer model.model.3.bn.bias is not quantized (outside SF16 range)
Layer model.model.4.conv.wei

## RUN BENCHMARK COMPARISONS

In [26]:
import torch
import cv2
import psutil
import time
import numpy as np
from torchvision.ops import box_iou

# [Include the SF16 quantization functions from the previous artifact here]

def calculate_memory_fp32(model):
    total_params = sum(p.numel() for p in model.parameters())
    memory_usage_fp32 = total_params * 4 / (1024 * 1024)
    return memory_usage_fp32

def estimate_complexity(model):
    return sum(p.numel() for p in model.parameters())

def calculate_accuracy(model, img, true_boxes):
    results = model(img)
    pred_boxes = results.pandas().xyxy[0]

    true_boxes_tensor = torch.tensor(true_boxes, dtype=torch.float32)
    pred_boxes_tensor = torch.tensor(pred_boxes[['xmin', 'ymin', 'xmax', 'ymax']].values, dtype=torch.float32)

    ious = box_iou(pred_boxes_tensor, true_boxes_tensor)
    accuracy = ious.mean().item() if len(ious) > 0 else 0
    return accuracy

def detect_image(model, image_path, true_boxes, is_quantized=False):
    start_time = time.time()
    process = psutil.Process()
    initial_memory = process.memory_info().rss

    img = cv2.imread(image_path)
    img = cv2.resize(img, (640, 640))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    complexity = estimate_complexity(model)
    accuracy = calculate_accuracy(model, img, true_boxes)

    end_time = time.time()
    final_memory = process.memory_info().rss
    elapsed_time = end_time - start_time
    memory_used = (final_memory - initial_memory) / (1024 * 1024)

    if is_quantized:
        _, _, memory_model = apply_selective_sf16_quantization(model)
    else:
        memory_model = calculate_memory_fp32(model)

    total_params = sum(p.numel() for p in model.parameters())

    return {
        "time": elapsed_time,
        "memory_rss": memory_used,
        "memory_model": memory_model,
        "total_params": total_params,
        "complexity": complexity,
        "accuracy": accuracy
    }

# Load models
normal_model = torch.hub.load('yolov7', 'custom', '/kaggle/working/yolov7.pt', source='local', force_reload=True)
normal_model.eval()

quantized_model = torch.hub.load('yolov7', 'custom', '/kaggle/working/yolov7.pt', source='local', force_reload=True)
quantized_model.eval()
total_params, quantized_params, quantized_memory = apply_selective_sf16_quantization(quantized_model)

# Path to your image and true boxes
image_path = '/kaggle/input/yolodata/649aabe32de76.jpg'
true_boxes = [[50, 50, 200, 200], [300, 300, 500, 500]]  # Replace with actual true bounding boxes

# Perform detection and measure performance
normal_results = detect_image(normal_model, image_path, true_boxes)
quantized_results = detect_image(quantized_model, image_path, true_boxes, is_quantized=True)

# Print comparison results
print("Normal YOLOv7:")
print(f"Processing time: {normal_results['time']:.2f} seconds")
print(f"Memory used (RSS): {normal_results['memory_rss']:.2f} MB")
print(f"Memory used by model: {normal_results['memory_model']:.2f} MB")
print(f"Estimated complexity: {normal_results['complexity']:,} operations")
print(f"Total parameters: {normal_results['total_params']:,}")
print(f"Accuracy: {normal_results['accuracy']*100:.2f}%")

print("\nSelectively Quantized YOLOv7 (SF16):")
print(f"Processing time: {quantized_results['time']:.2f} seconds")
print(f"Memory used (RSS): {quantized_results['memory_rss']:.2f} MB")
print(f"Memory used by model: {quantized_results['memory_model']:.2f} MB")
print(f"Estimated complexity: {quantized_results['complexity']:,} operations")
print(f"Total parameters: {quantized_results['total_params']:,}")
print(f"Quantized parameters: {quantized_params:,}")
print(f"Percentage of parameters quantized: {(quantized_params / total_params) * 100:.2f}%")
print(f"Accuracy: {quantized_results['accuracy']*100:.2f}%")

# Calculate and print differences
time_diff = (normal_results['time'] - quantized_results['time']) / normal_results['time'] * 100
memory_diff = (normal_results['memory_model'] - quantized_results['memory_model']) / normal_results['memory_model'] * 100
accuracy_diff = normal_results['accuracy'] - quantized_results['accuracy']

print(f"\nTime improvement: {time_diff:.2f}%")
print(f"Memory improvement: {memory_diff:.2f}%")
print(f"Accuracy difference: {accuracy_diff*100:.2f}% (Normal - Quantized)")

Adding autoShape... 
Adding autoShape... 
Normal YOLOv7:
Processing time: 1.53 seconds
Memory used (RSS): 7.09 MB
Memory used by model: 143.51 MB
Estimated complexity: 37,620,125 operations
Total parameters: 37,620,125
Accuracy: 4.56%

Selectively Quantized YOLOv7 (SF16):
Processing time: 1.36 seconds
Memory used (RSS): 0.02 MB
Memory used by model: 71.83 MB
Estimated complexity: 37,620,125 operations
Total parameters: 37,620,125
Quantized parameters: 37,580,384
Percentage of parameters quantized: 99.89%
Accuracy: 4.55%

Time improvement: 11.07%
Memory improvement: 49.95%
Accuracy difference: 0.01% (Normal - Quantized)
