In [2]:
!pip install -q bitsandbytes datasets accelerate loralib
!pip install -q git+https://github.com/huggingface/peft.git 

In [3]:
pip install -U transformers==4.38.1 -q

Note: you may need to restart the kernel to use updated packages.


In [4]:
from datasets import load_dataset
import torch
import torch.nn as nn
import bitsandbytes as bnb
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
import os



In [2]:
# incase dataset loading raises an error
# ! pip install -U datasets

# Load Data

In [5]:
Dataset_id = 'wikisql'
dataset = load_dataset(Dataset_id)

Downloading builder script:   0%|          | 0.00/1.87k [00:00<?, ?B/s]

Downloading metadata:   0%|          | 0.00/875 [00:00<?, ?B/s]

Downloading and preparing dataset wiki_sql/default (download: 24.95 MiB, generated: 147.57 MiB, post-processed: Unknown size, total: 172.52 MiB) to /root/.cache/huggingface/datasets/wiki_sql/default/0.1.0/7037bfe6a42b1ca2b6ac3ccacba5253b1825d31379e9cc626fc79a620977252d...


Downloading data:   0%|          | 0.00/26.2M [00:00<?, ?B/s]

Generating test split:   0%|          | 0/15878 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/8421 [00:00<?, ? examples/s]

Generating train split:   0%|          | 0/56355 [00:00<?, ? examples/s]

Dataset wiki_sql downloaded and prepared to /root/.cache/huggingface/datasets/wiki_sql/default/0.1.0/7037bfe6a42b1ca2b6ac3ccacba5253b1825d31379e9cc626fc79a620977252d. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [6]:
from torch.utils.data import Dataset, DataLoader
from datasets.dataset_dict import DatasetDict
import pandas as pd
class SQLDataSet(Dataset):
    def __init__(self,raw_dataset:DatasetDict,split='train'):
        self._initialize(raw_dataset,split)
    
    def _initialize(self,raw_dataset,split):
        subset = raw_dataset[split]
        answers = [ans.get('human_readable') for ans in subset['sql']]
        self.dataframe = pd.DataFrame({'question':subset['question'],
                                      'answer':answers})
    def __len__(self):
        return self.dataframe.shape[0]
    
    def __getitem__(self,index):
        return self.dataframe.iloc[index,:].to_dict()


In [7]:
train = SQLDataSet(dataset,split='train')

In [8]:
from IPython.display import display, Markdown

for i,sample in enumerate(train):
    display(Markdown(f"### Question {i+1}:\n{sample['question']}\n### Answer:\n{sample['answer']}"))
    if i==3:
        break

### Question 1:
Tell me what the notes are for South Australia 
### Answer:
SELECT Notes FROM table WHERE Current slogan = SOUTH AUSTRALIA

### Question 2:
What is the current series where the new series began in June 2011?
### Answer:
SELECT Current series FROM table WHERE Notes = New series began in June 2011

### Question 3:
What is the format for South Australia?
### Answer:
SELECT Format FROM table WHERE State/territory = South Australia

### Question 4:
Name the background colour for the Australian Capital Territory
### Answer:
SELECT Text/background colour FROM table WHERE State/territory = Australian Capital Territory

In [9]:
dataset

DatasetDict({
    test: Dataset({
        features: ['phase', 'question', 'table', 'sql'],
        num_rows: 15878
    })
    validation: Dataset({
        features: ['phase', 'question', 'table', 'sql'],
        num_rows: 8421
    })
    train: Dataset({
        features: ['phase', 'question', 'table', 'sql'],
        num_rows: 56355
    })
})

In [10]:
val = SQLDataSet(dataset,'validation')
test = SQLDataSet(dataset,'test')

# Load Base Model

In [11]:
!pip install --quiet --upgrade huggingface_hub

In [12]:
# Setup the environment
from huggingface_hub import login
from kaggle_secrets import UserSecretsClient
access_token_read = UserSecretsClient().get_secret("HF")
login(token = access_token_read)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


In [None]:
# pip install -U flash-attn==2.5.5 --no-build-isolation

In [15]:
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
MODEL_DIR = "/kaggle/input/gemma/transformers/2b/2"
# for fune-tunning it's best practise to load the model in 16 bit it's give the best 
quantization_config = BitsAndBytesConfig(load_in_4bit=True,
                                        load_4bit_use_double_quant=True,
                                        bnb_4bit_quant_type="nf4",
                                        bnb_4bit_compute_dtype=torch.bfloat16)

tokenizer = AutoTokenizer.from_pretrained(MODEL_DIR,
                                        add_eos_token=True,
                                         )
model = AutoModelForCausalLM.from_pretrained(MODEL_DIR,
                                             quantization_config=quantization_config,
                                             device_map="auto",
                                             torch_dtype=torch.float32,
                                             trust_remote_code=True,
#                                             attn_implementation="flash_attention_2") # needs only special GPUs
                                            )

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [16]:
model

GemmaForCausalLM(
  (model): GemmaModel(
    (embed_tokens): Embedding(256000, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-17): 18 x GemmaDecoderLayer(
        (self_attn): GemmaSdpaAttention(
          (q_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear4bit(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear4bit(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): GemmaRotaryEmbedding()
        )
        (mlp): GemmaMLP(
          (gate_proj): Linear4bit(in_features=2048, out_features=16384, bias=False)
          (up_proj): Linear4bit(in_features=2048, out_features=16384, bias=False)
          (down_proj): Linear4bit(in_features=16384, out_features=2048, bias=False)
          (act_fn): GELUActivation()
        )
        (input_layernorm): GemmaRMSNorm()
        (post_attention_layernorm): GemmaRMSNorm()
     

In [31]:
target_modules = ['q_proj',
                 'k_proj',
                 'v_proj',
                 'o_proj',
#                  'gate_proj',
#                  'up_proj',
#                  'down_proj',
#                  'lora_magnitude_vector'
                 ]
unfreeze= [ 'embed_tokens',
          'input_layernorm',
          'post_attention_layernorm']

# Loading Check 

In [18]:
# check that all paramters are existing in GPU not in cpu(meta)
for n,p in model.named_parameters():
    if p.device.type == 'meta':
        print(f"{n} is on meta")

# Test before Fine-tuning

In [19]:
def generate(question:str,model):
    input_ids = tokenizer(question,
                          padding=True,
                          truncation=True,
                          return_tensors="pt").to('cuda')
    outputs = model.generate(**input_ids,
                            max_new_tokens=50,
                            do_sample=True,
                            top_p=0.92,
                            top_k=0,
)
    return tokenizer.decode(outputs[0],skip_special_tokens=True)

In [20]:
model.config

GemmaConfig {
  "_name_or_path": "/kaggle/input/gemma/transformers/2b/2",
  "architectures": [
    "GemmaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 2,
  "eos_token_id": 1,
  "head_dim": 256,
  "hidden_act": "gelu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 16384,
  "max_position_embeddings": 8192,
  "model_type": "gemma",
  "num_attention_heads": 8,
  "num_hidden_layers": 18,
  "num_key_value_heads": 1,
  "pad_token_id": 0,
  "quantization_config": {
    "_load_in_4bit": true,
    "_load_in_8bit": false,
    "bnb_4bit_compute_dtype": "bfloat16",
    "bnb_4bit_quant_type": "nf4",
    "bnb_4bit_use_double_quant": false,
    "llm_int8_enable_fp32_cpu_offload": false,
    "llm_int8_has_fp16_weight": false,
    "llm_int8_skip_modules": null,
    "llm_int8_threshold": 6.0,
    "load_in_4bit": true,
    "load_in_8bit": false,
    "quant_method": "bitsandbytes"
  },
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,

In [15]:
test_data = ['What is the capital of Italy?',
            'What are the top 3 highest grossing movies of all time?',
            'Write me a poem about Machine Learning.',
            'what is ']
for q in test_data:
    print(generate(q,model))

Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.
2024-02-28 00:26:52.545642: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-28 00:26:52.545698: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-28 00:26:52.547203: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


What is the capital of Italy?
What are the top 3 highest grossing movies of all time?
Write me a poem about Machine Learning.
what is 


In [16]:
for i,sample in enumerate(train):
    generated_ans = generate(sample['question']+"?",model)
    display(Markdown(f"### Question {i+1}:\n{sample['question']}\n### Generated Answer:\n{generated_ans}\n### Actual Answer:\n{sample['answer']}"))
    if i==3:
        break

### Question 1:
Tell me what the notes are for South Australia 
### Generated Answer:
Tell me what the notes are for South Australia ?
### Actual Answer:
SELECT Notes FROM table WHERE Current slogan = SOUTH AUSTRALIA

### Question 2:
What is the current series where the new series began in June 2011?
### Generated Answer:
What is the current series where the new series began in June 2011??
### Actual Answer:
SELECT Current series FROM table WHERE Notes = New series began in June 2011

### Question 3:
What is the format for South Australia?
### Generated Answer:
What is the format for South Australia??
### Actual Answer:
SELECT Format FROM table WHERE State/territory = South Australia

### Question 4:
Name the background colour for the Australian Capital Territory
### Generated Answer:
Name the background colour for the Australian Capital Territory?

Spain, also known as Spania, is one of the 19 countries of the European Union and the European Mainland.

Abu Dhabi is the capital of the United Arab Emirates.

Georgia is a North Caucasian country, is part of the
### Actual Answer:
SELECT Text/background colour FROM table WHERE State/territory = Australian Capital Territory

# Freeze layers

In [33]:
for params in model.parameters():
    params.requires_grad = False # Freeze all parameter
    if params.ndim == 1:
        params.data = params.data.to(torch.float32) # cast to float32 for stability

# Enables the gradients for the input embeddings.
# This is useful for fine-tuning adapter weights while keeping the model weights fixed.
model.enable_input_require_grads()
# reduce number of stored activations
model.gradient_checkpointing_enable()
class CastToFloat(nn.Sequential):
    def forward(self,x):
        return super().forward(x).to(torch.float32)
model.lm_head = CastToFloat(model.lm_head)

# Lora Configuration

In [34]:
from peft import LoraConfig, get_peft_model
task = 'CAUSAL_LM'
desired_rank = 8
lora_alpha = 32
lora_dropout = 0.1
lora_config = LoraConfig(
    task_type = task,
    r = desired_rank,
    lora_alpha = lora_alpha,
    lora_dropout = lora_dropout,
    target_modules=target_modules,
    bias = 'none',
#     use_dora = True only for DoRA
)

peft_model = get_peft_model(model,lora_config)
peft_model.print_trainable_parameters()

trainable params: 1,843,200 || all params: 2,515,978,240 || trainable%: 0.07325977509249047


In [19]:
print(tokenizer)

GemmaTokenizerFast(name_or_path='/kaggle/input/gemma/transformers/2b/2', vocab_size=256000, model_max_length=1000000000000000019884624838656, is_fast=True, padding_side='left', truncation_side='right', special_tokens={'bos_token': '<bos>', 'eos_token': '<eos>', 'unk_token': '<unk>', 'pad_token': '<pad>'}, clean_up_tokenization_spaces=False),  added_tokens_decoder={
	0: AddedToken("<pad>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	1: AddedToken("<eos>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	2: AddedToken("<bos>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
	3: AddedToken("<unk>", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),
}


# UnFreeze Layers

In [25]:
# for name,param in peft_model.named_parameters():
#     if any(unfree in name for unfree in unfreeze):
#             param.requires_grad_(False)

            
# trainable_params = {n:p for n,p in model.named_parameters() if p.requires_grad}

In [35]:
peft_model.print_trainable_parameters()

trainable params: 1,843,200 || all params: 2,515,978,240 || trainable%: 0.07325977509249047


In [21]:
# trainable_params.keys()

In [36]:
peft_model

PeftModelForCausalLM(
  (base_model): LoraModel(
    (model): GemmaForCausalLM(
      (model): GemmaModel(
        (embed_tokens): Embedding(256000, 2048, padding_idx=0)
        (layers): ModuleList(
          (0-17): 18 x GemmaDecoderLayer(
            (self_attn): GemmaSdpaAttention(
              (q_proj): lora.Linear4bit(
                (base_layer): Linear4bit(in_features=2048, out_features=2048, bias=False)
                (lora_dropout): ModuleDict(
                  (default): Dropout(p=0.1, inplace=False)
                )
                (lora_A): ModuleDict(
                  (default): Linear(in_features=2048, out_features=8, bias=False)
                )
                (lora_B): ModuleDict(
                  (default): Linear(in_features=8, out_features=2048, bias=False)
                )
                (lora_embedding_A): ParameterDict()
                (lora_embedding_B): ParameterDict()
              )
              (k_proj): lora.Linear4bit(
                (base_la

In [39]:
from torchdata.datapipes.map import SequenceWrapper, Mapper

In [40]:
# tamplate to adapt

def create_prompt(question, answer):
    if len(answer) < 1:
        answer = "Cannot Find Answer"

    prompt_template = f"### QUESTION\n{question}\n### ANSWER\n{answer}</s>"
    return prompt_template

mapped_qa_train = Mapper(train,lambda sample: tokenizer(create_prompt(sample['question'], sample['answer'])),
                              )

In [41]:
mapped_qa_val = Mapper(val,lambda sample: tokenizer(create_prompt(sample['question'], sample['answer'])))

# Fine-Tuning

In [43]:
epochs = 2
fine_tune_tag = 'sql-qa-fine-tuned-model'
model_name = 'gemma-2b'
context_length = 8192
grad_accum = 2
batch_size = 4
save_dir = f'./results/{model_name}_{Dataset_id}_epochs_{epochs}_context_length_{context_length}'
print(save_dir)

./results/gemma-2b_wikisql_epochs_2_context_length_8192


In [27]:
# mapped_qa_train[0]

In [44]:
from transformers import Trainer,TrainingArguments,DataCollatorForLanguageModeling

2024-02-28 00:47:04.248758: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-02-28 00:47:04.248849: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-02-28 00:47:04.374275: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [51]:
from transformers import Trainer,TrainingArguments,DataCollatorForLanguageModeling
# from trl import SFTTrainer
trainer = Trainer(
    model = peft_model,
#     tokenizer=tokenizer,
    train_dataset= mapped_qa_train,
    eval_dataset=mapped_qa_val,
    args= TrainingArguments(
#         save_steps = 20,
        do_eval=True,

        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size = batch_size,
        gradient_accumulation_steps=grad_accum,
        warmup_steps=100,
        max_steps=20,
        learning_rate=1e-3,
        fp16=True,
        logging_steps=1,
        output_dir=save_dir,
        log_level = 'debug',
        lr_scheduler_type='constant',
        max_grad_norm=0.3,
#         evaluation_strategy='steps',
#         eval_steps=0.2,
    ),
    data_collator=DataCollatorForLanguageModeling(tokenizer, mlm=False)
)


PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
You have loaded a model on multiple GPUs. `is_model_parallel` attribute will be force-set to `True` to avoid any unexpected behavior such as device placement mismatching.
max_steps is given, it will override any value given in num_train_epochs
Using auto half precision backend


In [52]:
peft_model.config.use_cache = False  # silence the warnings. Please re-enable for inference!
trainer.train()

Currently training with a batch size of: 4
***** Running training *****
  Num examples = 56,355
  Num Epochs = 1
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 8
  Gradient Accumulation steps = 2
  Total optimization steps = 20
  Number of trainable parameters = 1,843,200
Automatic Weights & Biases logging enabled, to disable set os.environ["WANDB_DISABLED"] = "true"


Step,Training Loss
1,0.7377
2,1.2395
3,1.0772
4,1.2621
5,1.2461
6,1.3272
7,1.1682
8,1.1631
9,0.9704
10,0.9048




Training completed. Do not forget to share your model on huggingface.co/models =)




TrainOutput(global_step=20, training_loss=1.1479340583086013, metrics={'train_runtime': 210.9718, 'train_samples_per_second': 0.758, 'train_steps_per_second': 0.095, 'total_flos': 115390565744640.0, 'train_loss': 1.1479340583086013, 'epoch': 0.0})

In [61]:
HUGGING_FACE_USER_NAME = "ahmedelsayed"
model = 'gemma-2b'
peft_model.push_to_hub(f"{HUGGING_FACE_USER_NAME}/{model}", use_auth_token=True)

Uploading the following files to ahmedelsayed/gemma-2b: adapter_config.json,README.md,adapter_model.safetensors


adapter_model.safetensors:   0%|          | 0.00/39.3M [00:00<?, ?B/s]

CommitInfo(commit_url='https://huggingface.co/ahmedelsayed/gemma-2b/commit/030b6c4070ac2b22c021cabc80e71fe09b6fb7d8', commit_message='Upload model', commit_description='', oid='030b6c4070ac2b22c021cabc80e71fe09b6fb7d8', pr_url=None, pr_revision=None, pr_num=None)

In [62]:
import torch
from peft import PeftModel, PeftConfig
from transformers import AutoModelForCausalLM, AutoTokenizer

peft_model_id = f"{HUGGING_FACE_USER_NAME}/{model}"
config = PeftConfig.from_pretrained(peft_model_id)
model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=False, device_map='auto')
tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)

# Load the Lora model
qa_model = PeftModel.from_pretrained(model, peft_model_id)

adapter_config.json:   0%|          | 0.00/660 [00:00<?, ?B/s]

loading configuration file /kaggle/input/gemma/transformers/2b/2/config.json
Model config GemmaConfig {
  "_name_or_path": "/kaggle/input/gemma/transformers/2b/2",
  "architectures": [
    "GemmaForCausalLM"
  ],
  "attention_bias": false,
  "attention_dropout": 0.0,
  "bos_token_id": 2,
  "eos_token_id": 1,
  "head_dim": 256,
  "hidden_act": "gelu",
  "hidden_size": 2048,
  "initializer_range": 0.02,
  "intermediate_size": 16384,
  "max_position_embeddings": 8192,
  "model_type": "gemma",
  "num_attention_heads": 8,
  "num_hidden_layers": 18,
  "num_key_value_heads": 1,
  "pad_token_id": 0,
  "rms_norm_eps": 1e-06,
  "rope_scaling": null,
  "rope_theta": 10000.0,
  "torch_dtype": "bfloat16",
  "transformers_version": "4.38.1",
  "use_cache": true,
  "vocab_size": 256000
}

loading weights file /kaggle/input/gemma/transformers/2b/2/model.safetensors.index.json
Generate config GenerationConfig {
  "bos_token_id": 2,
  "eos_token_id": 1,
  "pad_token_id": 0
}



Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

All model checkpoint weights were used when initializing GemmaForCausalLM.

All the weights of GemmaForCausalLM were initialized from the model checkpoint at /kaggle/input/gemma/transformers/2b/2.
If your task is similar to the task the model of the checkpoint was trained on, you can already use GemmaForCausalLM for predictions without further training.
loading configuration file /kaggle/input/gemma/transformers/2b/2/generation_config.json
Generate config GenerationConfig {
  "bos_token_id": 2,
  "eos_token_id": 1,
  "pad_token_id": 0
}

loading file tokenizer.model
loading file tokenizer.json
loading file added_tokens.json
loading file special_tokens_map.json
loading file tokenizer_config.json


adapter_model.safetensors:   0%|          | 0.00/39.3M [00:00<?, ?B/s]

In [63]:
def make_inference(question,qa_model):
    batch = tokenizer(f"### QUESTION\n{question}\n### ANSWER\n", return_tensors='pt')
    qa_model.config.use_cache = True  # silence the warnings. Please re-enable for inference!
    qa_model.eval()
    with torch.cuda.amp.autocast():
        output_tokens = qa_model.generate(**batch, max_new_tokens=200)

    display(Markdown((tokenizer.decode(output_tokens[0], skip_special_tokens=True))))



In [59]:
test[0]['question']

"What is terrence ross' nationality"

In [64]:
make_inference(test[0]['question'],qa_model)

### QUESTION
What is terrence ross' nationality
### ANSWER
SELECT Nationality FROM table WHERE Player = Terrence Ross</s>

In [65]:
test[0]['answer']

'SELECT Nationality FROM table WHERE Player = Terrence Ross'

In [66]:
for i, t in enumerate(test):
    make_inference(t['question'],qa_model)
    display(Markdown(f"### Actual:\n{t['answer']}"))
    if i==10:
        break

### QUESTION
What is terrence ross' nationality
### ANSWER
SELECT Nationality FROM table WHERE Player = Terrence Ross</s>

### Actual:
SELECT Nationality FROM table WHERE Player = Terrence Ross

### QUESTION
What clu was in toronto 1995-96
### ANSWER
SELECT Clu FROM table WHERE Season = toronto 1995-96</s>

### Actual:
SELECT School/Club Team FROM table WHERE Years in Toronto = 1995-96

### QUESTION
which club was in toronto 2003-06
### ANSWER
SELECT Club FROM table WHERE Year = toronto 2003-06</s>

### Actual:
SELECT School/Club Team FROM table WHERE Years in Toronto = 2003-06

### QUESTION
how many schools or teams had jalen rose
### ANSWER
SELECT COUNT School/Team FROM table WHERE Player = Jalen Rose</s>

### Actual:
SELECT COUNT School/Club Team FROM table WHERE Player = Jalen Rose

### QUESTION
Where was Assen held?
### ANSWER
SELECT Location FROM table WHERE Race = assen</s>

### Actual:
SELECT Round FROM table WHERE Circuit = Assen

### QUESTION
What was the number of race that Kevin Curtain won?
### ANSWER
SELECT Race FROM table WHERE Driver = kevin curtain</s>

### Actual:
SELECT COUNT No FROM table WHERE Pole Position = Kevin Curtain

### QUESTION
What was the date of the race in Misano?
### ANSWER
SELECT Date FROM table WHERE Location = misano</s>

### Actual:
SELECT Date FROM table WHERE Circuit = Misano

### QUESTION
How many different positions did Sherbrooke Faucons (qmjhl) provide in the draft?
### ANSWER
SELECT COUNT Position FROM table WHERE Team = Sherbrooke Faucons (QMJHL)</s>

### Actual:
SELECT COUNT Position FROM table WHERE College/junior/club team = Sherbrooke Faucons (QMJHL)

### QUESTION
What are the nationalities of the player picked from Thunder Bay Flyers (ushl)
### ANSWER
SELECT Nationality FROM table WHERE Team = thunder bay flyers (ushl)</s>

### Actual:
SELECT Nationality FROM table WHERE College/junior/club team = Thunder Bay Flyers (USHL)

### QUESTION
How many different college/junior/club teams provided a player to the Washington Capitals NHL Team?
### ANSWER
SELECT COUNT College/Junior/Club Teams FROM table WHERE Team = washington capitals</s>

### Actual:
SELECT COUNT College/junior/club team FROM table WHERE NHL team = Washington Capitals

### QUESTION
How many different nationalities do the players of New Jersey Devils come from?
### ANSWER
SELECT Nationality FROM table WHERE Team = new jersey devils</s>

### Actual:
SELECT COUNT Nationality FROM table WHERE NHL team = New Jersey Devils