## model and tokenizer

In [1]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

model_name = "filipealmeida/Mistral-7B-Instruct-v0.1-sharded"

def load_quantized_model(model_name: str):
    bnb_config = BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16
    )

    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        device_map="auto",
        load_in_4bit=True,
        torch_dtype = torch.bfloat16,
    )

    return model

def initialize_tokenizer(model_name: str):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    tokenizer.bos_token_id = 1
    return tokenizer

model = load_quantized_model(model_name)

tokenizer = initialize_tokenizer(model_name)

stop_token_ids = [0]


The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


RuntimeError: No GPU found. A GPU is needed for quantization.

## check if working

In [None]:
##--- no run this ---##

text = "[INST]HOW WILL AI REPLACE HUMANS[/INST]"

encoded = tokenizer(text, return_tensors="pt", add_special_tokens=False)
model_input = encoded
generate_ids = model.generate(**model_input, max_new_tokens=200, do_sample=True)
decoded = tokenizer.batch_decode(generate_ids)
print(decoded[0])

## dataset load

In [3]:
from datasets import load_dataset

# Load the instruct_tune_dataset dataset
dataset = load_dataset("Dobby091/koko")



In [24]:
print(dataset["test"][0])

{'pdf_filename': '228_6.pdf', 'context': '- \n. . . IS : 228 (Part 8)-1987 \nIndian Standard \nMETHODS FOR \nCHEMICAL ANALYSIS OF STEELS \nPART 6 DETERMINATION OF-CHROMIUM BY \nPERSULPHATE OXIDATION METHOD \n(FOR CHROMIUM)04 PERCENT) \n( Third Revision ) \nFirst Reprint JANUARY 1991 \nUDC 669.14+669*15-194*2/*3 : 543[546*76] \n/-• \n’ ! \\ . _’ \nQ Copyright 1987 \nBUREAU OF INDIAN STANDARDS \nMANAK BHAVAN, 9 BAHADUR SHAH ZAFAR MARG \nNEW DBLHI 110002 \nGr3 August 1987 IS I 228 ( Part 6 ) - 1987 \n/ Indian Standard \nMETHODS FOR \nCHEMI-CAL ANALYSIS OF STEELS \nPART 8 DETERMINATION OF CHROMIUM BY \nPERSULPHATE OXIDATION METHOD \n( FOR CHROMIUM > W PERCENT ) \n( Third Revision ) \nMethods of Chemical Analysis of Ferrous Metals \nSectional Committee, SMDC 2 \nChairman \nDR C. S. P. IYER \nMembers \nSERI G. M. APPABAO Rcprcscnting \nBhabha Atomic Research Centre, Bombay \nSteel Authority of India Ltd (Bhilai Steel Plant ), \nBhilai \nSHRI R. D. A~ABWAL ( Altrmata) \nSHRI S. V. BHAQWAT Kha

## select the trainables

### not for now

In [26]:
from datasets import Dataset, DatasetDict

# Select the first 1000 samples in the train set and the first 200 samples in the test set
train_dataset = dataset['train'].select(range(15))
test_dataset = dataset['test'].select(range(2))

# Create a new `DatasetDict` to store the selected samples
selected_dataset_dict = DatasetDict({'train': train_dataset, 'test': test_dataset})

# Print the number of samples in each split
print(f"Number of train samples: {len(train_dataset)}")
print(f"Number of test samples: {len(test_dataset)}")

selected_dataset_dict

Number of train samples: 15
Number of test samples: 2


DatasetDict({
    train: Dataset({
        features: ['pdf_filename', 'context', 'question', 'answer'],
        num_rows: 15
    })
    test: Dataset({
        features: ['pdf_filename', 'context', 'question', 'answer'],
        num_rows: 2
    })
})

## create prompt

In [5]:
def create_prompt(sample):
    bos_token = "<s>"
    base_prompt1 = "below context is from "
    base_prompt2 = ", answer the folwing questions based on the context given \n"
    document = sample['pdf_filename']
    context = sample['context']
    answer = sample['answer']
    question = sample['question']
    eos_token = "</s>"
    full_prompt = ""
    full_prompt += bos_token
    full_prompt += "###Instruction:\n"
    full_prompt += base_prompt1
    full_prompt += document
    full_prompt += base_prompt2
    full_prompt += "\n\n###context:\n" + context
    full_prompt += "\n\n###question:\n" + question
    full_prompt += "\n\n###answer:\n" + answer
    full_prompt += eos_token
    print("------------------")
    print(full_prompt)
    print("------------------")
    return full_prompt

In [6]:
create_prompt(dataset["train"][0])


------------------
<s>###Instruction:
below context is from 228_4.pdf, answer the folwing questions based on the context given 


###context:
IS:228(Part4)-1987 
( ReaIhncd 1997 ) 
Indian Standard 
METHOD FOR 
CHEMICAL ANALYSIS OF STEELS 
PART 4 DETERMINATION OF TOTAL CARBON 
BY GRAVIMETRIC METHOD 
( FOR CARBON r 0.1 PERCENT) 
( Third Revision ) 
I;wflh Reprint NOVEMBER 1998 
UDC 669.14 + 669.15-194.2/.3 : 543.21 [546.26] 
BUREAU OF INDIAN STANDARDS 
MANAK BHAVAN, 9 BAHADUR SHAH ZAFAR MARG 
NEW DELHI 110002 
Gr 2 Aqirst 1987 IS:228(Part4)-1987 
Indian Standard 
METHODS FOR 
CHEMICAL ANALYSIS OF STEELS 
PART 4 DETERMINATION OF TOTAL CARBON 
BY GRAVIMETRIC METHOD 
(FOR CARBON > O-1 PERCENT) 
( Third Revision ) 
Methods of Chemical Analysis of Ferrous Metals 
Sectional Committee, SlMDC 2 
Chuiman Rqmsmting 
DR C. S. P. IYLR Bhabha Atomic Research Centre, Bombay 
Mcmbnr 
SHKI G. M. APPAUAO Steel Authority of India Ltd ( Bhilai Steel Plant ), 
Bhilai 
SIIBX R. D. AOAIIWAL ( AllmW) 
SHEI S. 

'<s>###Instruction:\nbelow context is from 228_4.pdf, answer the folwing questions based on the context given \n\n\n###context:\nIS:228(Part4)-1987 \n( ReaIhncd 1997 ) \nIndian Standard \nMETHOD FOR \nCHEMICAL ANALYSIS OF STEELS \nPART 4 DETERMINATION OF TOTAL CARBON \nBY GRAVIMETRIC METHOD \n( FOR CARBON r 0.1 PERCENT) \n( Third Revision ) \nI;wflh Reprint NOVEMBER 1998 \nUDC 669.14 + 669.15-194.2/.3 : 543.21 [546.26] \nBUREAU OF INDIAN STANDARDS \nMANAK BHAVAN, 9 BAHADUR SHAH ZAFAR MARG \nNEW DELHI 110002 \nGr 2 Aqirst 1987 IS:228(Part4)-1987 \nIndian Standard \nMETHODS FOR \nCHEMICAL ANALYSIS OF STEELS \nPART 4 DETERMINATION OF TOTAL CARBON \nBY GRAVIMETRIC METHOD \n(FOR CARBON > O-1 PERCENT) \n( Third Revision ) \nMethods of Chemical Analysis of Ferrous Metals \nSectional Committee, SlMDC 2 \nChuiman Rqmsmting \nDR C. S. P. IYLR Bhabha Atomic Research Centre, Bombay \nMcmbnr \nSHKI G. M. APPAUAO Steel Authority of India Ltd ( Bhilai Steel Plant ), \nBhilai \nSIIBX R. D. AOAIIWAL ( 

In [None]:
def generate_response(prompt, model):
    encoded_input = tokenizer.encode_plus(
        prompt,
        add_special_tokens=True,
        return_tensors="pt"
    )
    model_inputs = {k: v.to('cuda') for k, v in encoded_input.items() if torch.is_tensor(v)}
    generated_ids = model.generate(
        input_ids=model_inputs["input_ids"],
        attention_mask=model_inputs["attention_mask"],
        max_length=50,
        num_beams=5,
        early_stopping=True,
        max_new_tokens=20,  # corrected argument name
    )
    generated_text = tokenizer.decode(generated_ids[0])
    return generated_text


In [7]:
prompt = "###Instruction:\nbelow context is from 228_4.pdf, answer the folwing questions based on the context given \n\n\n###context:\nIS:228(Part4)-1987 \n( ReaIhncd 1997 ) \nIndian Standard \nMETHOD FOR \nCHEMICAL ANALYSIS OF STEELS \nPART 4 DETERMINATION OF TOTAL CARBON \nBY GRAVIMETRIC METHOD \n( FOR CARBON r 0.1 PERCENT) \n( Third Revision ) \nI;wflh Reprint NOVEMBER 1998 \nUDC 669.14 + 669.15-194.2/.3 : 543.21 [546.26] \nBUREAU OF INDIAN STANDARDS \nMANAK BHAVAN, 9 BAHADUR SHAH ZAFAR MARG \nNEW DELHI 110002 \nGr 2 Aqirst 1987 IS:228(Part4)-1987 \nIndian Standard \nMETHODS FOR \nCHEMICAL ANALYSIS OF STEELS \nPART 4 DETERMINATION OF TOTAL CARBON \nBY GRAVIMETRIC METHOD \n(FOR CARBON > O-1 PERCENT) \n( Third Revision ) \nMethods of Chemical Analysis of Ferrous Metals \nSectional Committee, SlMDC 2 \nChuiman Rqmsmting \nDR C. S. P. IYLR Bhabha Atomic Research Centre, Bombay \nMcmbnr \nSHKI G. M. APPAUAO Steel Authority of India Ltd ( Bhilai Steel Plant ), \nBhilai \nSIIBX R. D. AOAIIWAL ( AllmW) \nSHEI S. V. BHAOW~Y Khandelwnl Ferro Alloy1 Ltd, Nagpur \nSanr D. N. GUPTA ( Aflernnlc ) \nSEttIP. CHAKRA Indian Metals & Ferro Alloys Ltd, Koraput \nCREYWT& METALLVIWUST \nAINXVTANT RESEARCH OIFIC~~R Ministry of Transport ( Department of Railways) \n(MET-2) RDSO, Luclixow \n( Allrmdr ) \nCHIEW CIIEM~ST Tata Iron 8; Steel Co Ltd, Jamshedpur \nAWXSTANT CHIEF CHRMMT ( Allnnat~ ) \nSIiItI M. K. CEAtiRAVARTY Ministry of Defence (DGI) \nSnug P. K. SEN ( Altrrnotr ) \nDR M. M. CEAKRABO~TY Indian Iron & Steel Co Ltd, Burnpur \nSERB M. S. CEATTERJEE ( Altrrnotr ) \nSERI C. K. D~KOH~T Ordnance Factory Board ( Ministry of Defence ), \nCalcutta \nSARI S. N. MOITRA ( Abn~tr ) \nSaRI V.B. KIXANXA Directorate General of Supplies & Disposals, \nNew Delhi \nSHR~ J. N. MUIWERJEE Steel Authority of India Ltd (Durgapur Steel \nPlant ). Durgapur \n( Cen8inurd 8n porr 2 ) \n0 whr 1987 \nBUREAU OF INDIAN STANDARDS \nThis publication is protected -under the Itin clrprighl Act ( XIV of 1957 ) and \nrrproduction in whole or in part by any means except with written permission of the \npublisher shall be deemed to be an infringement of copyright under the said Act. IS:228(Part4)-1987 \nMcmbm Rtpcmting \nSHRI P. NABAIN \nSHRI G. R. Saalar ( Altnncfc) Mabindra Ugine Steel Co Ltd. Bombay \nSHBI N. P. PANDA Steel Autbority of India Ltd (Rourkela Steel \nPlant ), Rourkela \nSIIIU B. MAIIAPATRA ( Aftn~h ) \nDa L. P. PANDEY National Metallurgical Laboratory ( CSIR ), \nDR D. C. PRASHAR - N p;h-J~r a Pbvsical Laboratorv (CSIR ). \nNew Delhi’ , . I. \nSHBI J. RAI ( Al~rraa~ ) \nSaat G. RAJARAO \nSHRI K. RAMAKBIIHIAN \nDa J. RAJARAY ( Allma& ) \nSBRI A. P. !hNEA Ferro Alloys Cornoration Ltd. Shreeramaagar \nEurn & Co, Batgalore . \nSteel Authority of India Ltd ( Bokaro Steel \nPlant ), Bokaro \nSHRI N. V. SUEIBAIIA~APPA ’ Vivervnraya Iron 8t Steel Ltd. Bbadravati \nDB P. S~JBBAHYANIAY Dcfence Metallurgical Rewarcb Laboratory, \nHvderabad \nSHBI T. H. RAO (Al&m& ) , \nDE CR. VENKATLBWARLU Bhabha Atomic Research Centre, Bombay \nSABI K. RAOHAVENDBAN, Director General, BIS ( Ex-oficio Member) \nDirector ( Strut St Met ) \nSlcrrlary \nSFIBI M. L. SHARJ~A \nAssirtant Director ( Metalr ), BIS \nFerrpus Metals Analysis Subcommittee, SMDC 2 : 3 \ncono#mr \nDs C. 9. P. IYBR \nhitathrr \nSHRI S. BASKABAN Bhabha Atomic Research Centre, Bombay \nSHBI MATA SARAN ( A&m& I ) Bharat Heavy Electricalr Ltd, Hjderahad \nSHRI B. RAEA (Alternate II ) \n!&RI U.P.bJB Steel Authority of India Ltd ( Bhilai Steel Plant ), \nBbilai \nSIiRI E. M. VERQHEtlE ( Altmatr ) \nCHlElP CEEMIRT Tata Iron & Steel Co Ltd. Iamrhednur \nAIWSTANT CIIIBP CEBYIST ( A&m& ) . \nDB M. M. CHAKBABORTT Indian Iron & Steel Co Ltd. Burnpur \nSHRI L. N. Dbs ( Altmmtr ) \nSEBI H. K. DAE Steel Authority of India Ltd ( hourkela Steel \nPlant ), Rourkela \nSHRI K. Bmaror ( Aitrrnats ) \nSHBI A. K. GIJPTA National Physical \nNew Delhi Labor&y ( C8IR ), \n( Cetttirurd on pap 8 ) \n2 IS : ‘228 ( Part 4 ) - 1987 \nIndian Standard \nMETHODS FOR \nCHEMICAL ANALYSIS OF STEELS \nPART 4 DETERYlNATlON OF TOTAL CARBON \nBY GARVIMETRIC METHOD \n(FOR CARBON > O-1 PERCENT ) \n( Third Revision ) \n0. FOREWORD \n0.1 This Indian Standard ( Part 4 ) (Third R cvision ) was adopted I)y \nthe Indian Standards Institution on 16 January 1987, afirr the draft \nfinalized by the Methods of Chemical Analysis of Pcrrous Metals \nSectional Committre had been approved by the Structural and Mct;~ls \nDivision Council. \n0.2 IS : 228, which was issued as a tentative standard in 1952 and \nrevised in 1959, covered the chemical analysis of pig iron, cast iron and \nplain carbon and low alloy steels. For the convenience it was decidctl \nto publish a comprehensive series on chemical analysis of steels \nincluding high alloy steels. Accordingly, revision of IS : 228 was \ntaken-up again and new series on method of chemical analysis of \nsteels including high alloy steels was published in various parts as \nIS : 228 ( Parts 1 to 13 ) ( see Appendix A ) covering separate method \nof analysis for each constituent in steels. However, IS : 228-195!)* \nversion has been rctainc tl for the analysis of pig iron and cast iron till \na separate standard for analysis of pig iron and cast iron is phlishctl. \n0.2.1 This revision of IS : 228 (Part 4)-19747 has been undcrtnkcn \non the basis of experience gained during the implementation of \\he \nstandard by the manufacturers and testing laboratories. \n0.3 In this revision, method has been updated. \n*Methods of chemical analysis of pig iron, cast won and plain carb’on and low \nalloy 1teela [ rruiud )* \ntMethod1 for chemical analysis of steels: Part 4 Determination of carbon by \ngravimetric method ( for carbon > 0.1 percent ) (rend rmi~ion ). \n3 IS : 228 ( Part 4 ) - 1987 \n8.4 In rr,porting 111~ reslllt of a test or analysis made in accordance \nr\\.itIl tItis 51 ;~ntlartl, if the final value, ohservcd or calculated, is to be \nI r)~~trcIt~tI 011; it >11;111 Iw clone in accordance with IS : 2-1960*. \n1. SCOPE \n1.1 This standard (Part 4) covers the method for determination of \ntotal carljon content of plain carbon, low alloy and high alloy steels \nnfO.1 pcrccnt and above by the grnvimctric method. \n2. OUTLINE OF THE METHOD \n2.1 The sample is burnt in a stream of purified oxygen and the \ncarhnn dioxide formctl is al~wrlwtl, after purihation, in suitable \nahorhant and tlc~tcrminc~cl. \n3. REAGENTS \n3.1 Oxygen (On) - 90’5 percent minimum. \n3.2 Accarite or Soda Lime -- 0’80 to 2’00 mm. \n3.3 Magnesium Perchlorate - Mg (CIO( )2, 0.80 to 2’00 mm. \n3.4 Boat/Crucible - Boat/crucible of precise dimension for \naccommodating in the r&stance and induction furnace. \n3.4.1 Preignitc the boats/crucibles in air or oxygen in a furnace for \nan hour at 1 100°C and stnrc in a desiccator and check for consistancy \nof the blank values. \n3.5 Flux,‘Accelerator - Low carbon copper, red lead ( preignited \nat 550’(Z), tin and iron of low carbon content. \n4. APPARATUS \n4.1 The apparatus recommended in IS : 6226 (Part l)-1971t may be \n11setl \n4.2 Instead 01‘ the resistance furnace, an induction furnace may also \nbe used. \n--- \n*Rules for rounding ofl’ numerical valuer ( mid). \ntRecommcndation for apparatus for chemical analysis of metals: Put I \nDetermination of carbon by direct combustion. \n4 IS:228(Part4)-1987 \n5. SAMPLING \n5.1 The sample shall be drawn as prescribed in the relevant Indian \nStandards. \n5.2 The sample is to be cleaned with analar grade ether and acetone, \ndried in an air oven at 100 f 5°C. \n6. PROCEDURE \n6.1 Assemble the apparatus. Switch on the furnace, if it is a resistance \nfurnace, and allow it to attain a temperature of 1 05O’C (see Note ), all \nthe while passing oxygen through the apparatus so that it bubbles \nfreely at the exit end of the train. Disconnect the absorption bulb, \nkeep in a desiccator till it attains room temperature and take the \ninitial weight. Repeat the operation till a constant weight is \nobtained. . \nNOTB - For high chromium and high nickel rteelr, the temperature of 1 250°C \nin recommended for complete comburtion. \n6.2 Weigh to the nearest 0’001 g, 2’0 to 3’0 g of the test sample. \nTransfer to the preignited combustion boat covered at the bottom \nwith a thin layer of calcined alumina. Spread the sample evenly over \nthe top of the alumina and cover it with 2’0 to 3’0 g of the flux. \nIntroduce the boat slowly In the hot zone of the combustion tube. \n6.3 In the case of induction heating, weigh to the nearest 0’001 g, 0’9 \nto 1’1 g of the sample and transfer to a preignited crucible. Add an \nequal quantity of the flux. Place the crucible in Position on the \npedestal post of the furnace, raise to the combustion position and \nlock the system. Pass oxygen through the system and ignite the sample. \n6.4 Maintain a rapid flow of oxygen ( 800 to 1 000 ml/min) throughout \nthe combustion, then reduce to 400 to 500 ml per min and maintain \nit for another 6 co 8 min in order to sweep out the carbon dioxide. \n6.5 Remove the absorption bulb and weigh it after keeping it in \ndesiccator till it attains room temperature. The increase in weight of \nthe bulb represents the carbon dioxide. \n6.6 Remove the boat or crucible and examine for any incomplete \ncombustion. If the sample is not thoroughly fused, repeat the \ndetermination with a fresh sample. \n6.7 Blank - Charge a prei nited boat or crucible, as the case may be, \nwith the same amount of B ux used in the determination and follow \nthe procedure as in 6.2 to 6.5. \n5 IS:228(Part4)-1981 \n7. CALCULATION \n7.1 Calculate the total carbon content of the sample as follows: \nCarbon, percent A-B = --x 27’29 c \nwhere \nA = increase in mass in g of the absorption bulb due to \ncarbon dioxide from the sample, \nB = increase in mass in g of the absorption bulb due to \ncarbon dioxide from the blank determination, and \nC= mass in g of the sample taken. \n8. ACCURACY \n8.1 The accuracy of the method is & 0’01 percent for carbon in the \nrange of 0’1 to 0’75 percent and f 0’02 percent for carbon \nabove 0’75 percent. \nAPPENDIX A \n( Clause 0.2 ) \nINDIAN STANDARDS ON METHODS FOR CHEMICAL \nANALYSIS OF STEELS \nIS : 228 Methods for chemical analysis of steels: \n( Part 1 )-I972 Determination of carbon by volumetric method \n( for carbon > 0’1 percent ) ( second revision ) \n( Part 2)-1972 Determination of manganese in plain carbon and \nlow alloy steels by arsenite method ( second revision ) \n( Part 3 )-1972 Determination of phosphorus by alkalimetric method \n( second rcvisiorr ) \n( Part 4 )-1987 Determination of total carbon by gravimetric method \n( for carbon ) 0’1 percent ) ( third revision ) \n( Part 5 )-I974 Determination of nickel by dimethyl lyoxime \nravimetric ) method ( for nickel > 0’5 percent \n?!Xon ) s ( second \nG IS : 228 ( Part 4 ) - 1987 \n(Part S)-1974 Determination of chromium hy persi!ipha?e \noxidation method ( for chromium > 0‘5 percent : I serond \nrcuision ) \n( Part 7 )-1974 Determination of molybdenum by a-bcnzoinoxirne \nmethod ( for molybdenum > 1 pwccnt ) ( ~vtond rFi/iJiott ) \n( Part 8)-1975 Determination of silicon by the_ ~ravimetric method \n( for silicon > 0’ 1 percent ) ( second reutszon ) \n(Part 9)-1975 Determination of sulphur in plain CW~J(JII steels by \nevolution met hod (second revirion ) \n( Parr 10 )-1976 Determination of molybdenum by thiocyannte \n(photometric) method ( for molybdenum up to 1 perwnt ) \nin low and high alloy steels ( second reviJir,tl ) \n( Part 11 )-1976 Determination of silicon by photometric method in \ncarbon steels and low alloy steels ( for silicon 0’01 to 0’05 \npercent ) ( second relision ) \n( Part 12 )-I976 Determination Of manganese hy periodate \n(photometric) method in low and high i\\lloy steels ( for \nmanganese up to 2 percent ) ( second revision ) \n( Part 13 )-1982 Determination of arsenic \n7 IS : 228 ( Part 4 ) - 1987 \nRaprcsraIing \nSteel Authority of India Ltd (Durgapur Steel \nPlant ), Durgapur \nSJIRI P. K. BANEHJEE ( Altrrnafr ) \nSMHI P. NAI~AIN \nSHXI G. 11. SIRMA ( Altrrndr ) \nSJIRI R. S. NATH hahindra Ugine Steel CO Ltd, Bombay \nSteel Authority of India Ltd (Bokaro Steel \nPlant ), Rokaro \n8n1u N. GUNL)APPA ( Altarnatr ) \n1)~ L. P. PANDEY \nSun1 G. Rnnlons \nSIIHI R. D. VANDRIWALLA \nSJIRI J. C. DEY ( Afterme ) National Metallurgical Laboratory ( CSIR ), \nJamrhedpur _ \nVisvervarava Iron & Steel Ltd. Bbadravati \nltalab Pvt ‘Ltd. Bombay \n8 BUREAU OF INDIAN STANDARDS \nHeadquarters: \nManak Bhavan, 9 Bahadur Shah Zafar Marg, NEW DELHI 110002 \nTelephones: 323 0131,323 3375,323 9402 \nFax : 91 11 3234062, 91 11 3239399, 91 11 3239382 \nCentral Laboratory : \nPlot No. 20/9, Site IV, Sahibabad Industrial Area, Sahibabad 201010 \nRegional Offices: Telegrams : Manaksansths \n(Common to all Offices) \nTelephone \n8-77 00 32 \nCentral : Manak Bhavan, 9 Bahadur Shah Zafar Marg, NEW DELHI 110002 32376 17 \n*Eastern : l/l 4 CIT Scheme VII M, V.I.P. Road, Maniktola, CALCUTTA 700054 337 86 62 \nNorthern : SC0 335336, Sector 34-A, CHANDIGARH 160022 60 38 43 \nSouthern : C.I.T. Campus, IV Cross Road, CHENNAI 600113 235 2315 \ntWestem : Manakafaya, E9, Behind Marol Telephone Exchange, Andheri (East), 832 92 95 \nMUMBAI 400093 \nBranch Offices:: \n‘Pushpak’, Nurmohamed Shaikh Marg, Khanpur, AHMEDABAD 380001 \n$Peenya Industrial Area, 1 st Stage, Bangalore-Tumkur Road, \nBANGALORE 560058 5501348 \n839 49 55 \nGangotri Complex, 5th Floor, Bhadbhada Road, T.T. Nagar, BHOPAL 462003 55 40 21 \nPlot No. 6263, Unit VI, Ganga Nagar, BHUBANESHWAR 751001 40 36 27 \nKalaikathir Buildings, 670 Avinashi Road, COIMBATORE 641037 21 01 41 \nPlot No. 43, Sector 16 A, Mathura Road, FARIDABAD 121001 8-28 88 01 \nSavftri Complex, 116 G.T. Road, GHAZIABAD 201001 8-71 1996 \n53/5 Ward No.29, R.G. Barua Road, 5th By-lane, GUWAHATI 781003 541137 \n5-8-56C, L.N. Gupta Marg, Nampafly Station Road, HYDERABAD 500001 201083 \nE-52, Chitaranjan Marg, C- Scheme, JAIPUR 362001 37 29 25 \n117/418 8, Sarvodaya Nagar, KANPUR 208005 21 68 76 \nSeth Bhawan, 2nd Floor, Behind Leela Cinema, Naval Kishore Road, 2389 23 \nLUCKNOW 226001 \nNIT Building, Second Floor, Gokulpat Market, NAGPUR 440010 52 51 71 \nPatfiputra Industrial Estate, PATNA 800013 26 23 05 \nInstitution of Engineers (India) Building 1332 Shivaji Nagar, PUNE 411005 32 36 35 \nT.C. No. 14/l 421, Unhsity P. 0. Palayam, THlRUVANANTH4PURAM 695034 621 17 \n*Sales office is at 5 Chowringhee Approach, P.O. Princep Street, \nCALCUTTA 700072 271085 \ntSales Dfffce is at Novelty Chambers, Grant Road, MUMBAI 400067 \n$Sales Dffice is at ‘F’ Block, Unity Building, Narashimaraja Square, \nBANGALORE 560002 3096528 \n222 39 71 \nReprography Unit, BIS, New Delhi, India \n\n###question:\n1. What standard does the text discuss?\n\n###answer:\nIS : 228 (Part 4) - 1987"

In [None]:
## this actually gives bluff that is not needed
generate_response(prompt, model)

In [8]:
from peft import AutoPeftModelForCausalLM, LoraConfig, get_peft_model, prepare_model_for_kbit_training

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0.1,
    r=64,
    bias="none",
    task_type="CASUAL_LM"
)

  warn("The installed version of bitsandbytes was compiled without GPU support. "


/home/snsrlvm6/.local/lib/python3.8/site-packages/bitsandbytes/libbitsandbytes_cpu.so: undefined symbol: cadam32bit_grad_fp32


In [None]:
model = prepare_model_for_kbit_training(model)
model = get_peft_model(model, peft_config)

In [None]:
from transformers import TrainingArguments

args = TrainingArguments(
    output_dir = "KOKO",
    max_steps = 100,
    per_device_train_batch_size = 4,
    warmup_steps = 0.03,
    logging_steps = 10,
    save_strategy = "epoch",
    evaluation_strategy="steps",
    eval_steps=20,
    learning_rate=2e-4,
    lr_scheduler_type='constant',
)

In [None]:
from trl import SFTTrainer

max_seq_length = 2048

trainer = SFTTrainer(
    model=model,
    peft_config = peft_config,
    max_seq_length=max_seq_length,
    tokenizer = tokenizer,
    formatting_func=create_prompt,
    packing = True,
    args = args,
    train_dataset = selected_dataset_dict["train"],
    eval_dataset= selected_dataset_dict["test"]
)

In [None]:
trainer.train()

In [None]:
trainer.save_model("KOKO")

In [None]:
merged_model = model.merge_and_unload()

In [None]:
def generate_response(prompt,model):
  encoded_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True)
  model_inputs = encoded_input.to('cuda')
  generated_ids = model.generate(**model_inputs, max_new_tokens=1000, do_sample=True, pad_token_id = tokenizer.eos_token_id)
  decoded_output = tokenizer.batch_decode(generated_ids)
  return decoded_output[0]
prompt = ""
generate_response(prompt,model)