Skip to content

Commit

Permalink
GPTQ feature enhance (#1104)
Browse files Browse the repository at this point in the history
* fix bugs while calling inc fit api.

* export perm from gptq object

* move quantizers and perms to INC model

* support perm in WeightOnlyLinear (#1118)

* support gptq model compression with saved scale (#1119)

* support gptq scale

* remove zero when sym

* enhance WeightOnlyLinear for parallel

* add log for compression

* inference device use model.to() for WeightOnlyLinear 

* support lm_head rtn quantize in gptq export

* update regularization layer-wise config when directly call gptq_quantize.

* align with ipex-gpu requirement

* improve coverage

---------

Signed-off-by: YIYANGCAI <yiyang.cai@intel.com>
Signed-off-by: Xin He <xin3.he@intel.com>
Signed-off-by: wenhuach21 <wenhua.cheng@intel.com>
Signed-off-by: He, Xin3 <xin3.he@intel.com>
Co-authored-by: xinhe <xin3.he@intel.com>
Co-authored-by: wenhuach21 <wenhua.cheng@intel.com>
  • Loading branch information
3 people committed Aug 1, 2023
1 parent 88adfc9 commit 6ba7837
Show file tree
Hide file tree
Showing 19 changed files with 1,965 additions and 234 deletions.
Expand Up @@ -103,3 +103,10 @@ quantized_model = load(tuned_checkpoint, model)
```
--------
For more details, please refer to the [sample code](./run_clm.py).

# (May Remove Later) Run GPTQ algorithm
```
sh run-gptq-llm.sh
# You may want to move script run-gptq-llm.sh to root dir of neural compressor and modify python file's path.
# Please make sure pile dataset is downloaded.
```
@@ -0,0 +1,249 @@
import numpy as np
import torch
import datasets

# cache_dir = "~/.cache/"
cache_dir = None


def set_seed(seed):
np.random.seed(seed)
torch.random.manual_seed(seed)


def get_wikitext2(nsamples, seed, seqlen, model):
from datasets import load_dataset
traindata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='train', cache_dir=cache_dir)
testdata = load_dataset('wikitext', 'wikitext-2-raw-v1', split='test', cache_dir=cache_dir)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)
trainenc = tokenizer("\n\n".join(traindata['text']), return_tensors='pt')
testenc = tokenizer("\n\n".join(testdata['text']), return_tensors='pt')

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))
return trainloader, testenc


def get_ptb(nsamples, seed, seqlen, model):
from datasets import load_dataset
traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train', cache_dir=cache_dir)
valdata = load_dataset('ptb_text_only', 'penn_treebank', split='validation', cache_dir=cache_dir)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)
trainenc = tokenizer("\n\n".join(traindata['sentence']), return_tensors='pt')
testenc = tokenizer("\n\n".join(valdata['sentence']), return_tensors='pt')

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))
return trainloader, testenc


def get_c4(nsamples, seed, seqlen, model):
from datasets import load_dataset

traindata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'train': 'en/c4-train.00000-of-01024.json.gz'}, split='train',
cache_dir=cache_dir
)
valdata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
split='validation',
cache_dir=cache_dir
)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False)

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
while True:
i = random.randint(0, len(traindata) - 1)
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
if trainenc.input_ids.shape[1] >= seqlen:
break
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))

import random
random.seed(0)
valenc = []
for _ in range(256):
while True:
i = random.randint(0, len(valdata) - 1)
tmp = tokenizer(valdata[i]['text'], return_tensors='pt')
if tmp.input_ids.shape[1] >= seqlen:
break
i = random.randint(0, tmp.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
valenc.append(tmp.input_ids[:, i:j])
valenc = torch.hstack(valenc)

class TokenizerWrapper:
def __init__(self, input_ids):
self.input_ids = input_ids

valenc = TokenizerWrapper(valenc)

return trainloader, valenc


def get_pile(nsamples, seed, seqlen, model):
from datasets import load_dataset

traindata = load_dataset(
'NeelNanda/pile-10k', split='train',
cache_dir=cache_dir
)

valdata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
split='validation',
cache_dir=cache_dir
)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
while True:
i = random.randint(0, len(traindata) - 1)
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
if trainenc.input_ids.shape[1] > seqlen:
break
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))

import random
random.seed(0)
valenc = []
for _ in range(256):
while True:
i = random.randint(0, len(valdata) - 1)
tmp = tokenizer(valdata[i]['text'], return_tensors='pt')
if tmp.input_ids.shape[1] >= seqlen:
break
i = random.randint(0, tmp.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
valenc.append(tmp.input_ids[:, i:j])
valenc = torch.hstack(valenc)

class TokenizerWrapper:
def __init__(self, input_ids):
self.input_ids = input_ids

valenc = TokenizerWrapper(valenc)

return trainloader, valenc


def get_ptb_new(nsamples, seed, seqlen, model):
from datasets import load_dataset
traindata = load_dataset('ptb_text_only', 'penn_treebank', split='train', cache_dir=cache_dir)
testdata = load_dataset('ptb_text_only', 'penn_treebank', split='test', cache_dir=cache_dir)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)
trainenc = tokenizer(" ".join(traindata['sentence']), return_tensors='pt')
testenc = tokenizer(" ".join(testdata['sentence']), return_tensors='pt')

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))
return trainloader, testenc


def get_c4_new(nsamples, seed, seqlen, model):
from datasets import load_dataset
traindata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'train': 'en/c4-train.00000-of-01024.json.gz'}, split='train'
)
valdata = load_dataset(
'allenai/c4', 'allenai--c4', data_files={'validation': 'en/c4-validation.00000-of-00008.json.gz'},
split='validation'
)

from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained(model, use_fast=False, cache_dir=cache_dir)

import random
random.seed(seed)
trainloader = []
for _ in range(nsamples):
while True:
i = random.randint(0, len(traindata) - 1)
trainenc = tokenizer(traindata[i]['text'], return_tensors='pt')
if trainenc.input_ids.shape[1] >= seqlen:
break
i = random.randint(0, trainenc.input_ids.shape[1] - seqlen - 1)
j = i + seqlen
inp = trainenc.input_ids[:, i:j]
tar = inp.clone()
tar[:, :-1] = -100
trainloader.append((inp, tar))

valenc = tokenizer(' '.join(valdata[:1100]['text']), return_tensors='pt')
valenc = valenc.input_ids[:, :(256 * seqlen)]

class TokenizerWrapper:
def __init__(self, input_ids):
self.input_ids = input_ids

valenc = TokenizerWrapper(valenc)

return trainloader, valenc


def get_loaders(
name, nsamples=128, seed=0, seqlen=2048, model=''
):
if 'wikitext2' in name:
return get_wikitext2(nsamples, seed, seqlen, model)
if 'ptb' in name:
if 'new' in name:
return get_ptb_new(nsamples, seed, seqlen, model)
return get_ptb(nsamples, seed, seqlen, model)
if 'c4' in name:
if 'new' in name:
return get_c4_new(nsamples, seed, seqlen, model)
return get_c4(nsamples, seed, seqlen, model)
if 'pile' in name:
return get_pile(nsamples, seed, seqlen, model)
@@ -0,0 +1,17 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .lm_eval.evaluator import evaluate
@@ -0,0 +1,16 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# Copyright (c) 2022 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

0 comments on commit 6ba7837

Please sign in to comment.