# Getting Started (with Google Colab)
- In the Google Colab menu, go to File -> Open notebook
- In the dialogue that appears, click the GitHub tab
- Enter this repo as your GitHub URL, i.e., https://github.com/davedgd/studentcourse
- Click the search icon (or hit enter)
- Select the notebook you want to open to get started (e.g., studentcourse-llm.ipynb)
- **Note:** You will also need to change the runtime type to one with a GPU...
    - In the Google Colab menu, go to Runtime -> Change runtime type
    - Under 'Hardware accelator' choose at least a 'T4 GPU' (or better)
    - Click the Save button
- To run the notebook, in the Google Colab menu, go to Runtime -> Run all
- If you see a 'Warning: This notebook was not authored by Google' popup, click the 'Run anyway' button
- Wait patiently, as it make take a few minutes to install the packages and run the notebook
- **Note:** You may see occasional error messages, particularly when installing/loading pip packages -- you can generally safely ignore these (as long as the notebook runs)

In [1]:
!pip install sentencepiece safetensors --quiet
!pip install https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu118-cp310-cp310-linux_x86_64.whl --quiet

# you may need to install git-lfs first if you run into issues (see https://git-lfs.com/)
!git clone https://huggingface.co/TheBloke/sqlcoder-7B-GPTQ

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/437.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m143.4/437.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m437.3/437.3 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hfatal: destination path 'sqlcoder-7B-GPTQ' already exists and is not an empty directory.


In [2]:
# From https://github.com/jllllll/exllama/blob/master/example_basic.py (with minor edits)

from exllama.model import ExLlama, ExLlamaCache, ExLlamaConfig
from exllama.tokenizer import ExLlamaTokenizer
from exllama.generator import ExLlamaGenerator
import os, glob

# Directory containing model, tokenizer, generator

model_directory =  "./sqlcoder-7B-GPTQ"

# Locate files we need within that directory

tokenizer_path = os.path.join(model_directory, "tokenizer.model")
model_config_path = os.path.join(model_directory, "config.json")
st_pattern = os.path.join(model_directory, "*.safetensors")
model_path = glob.glob(st_pattern)[0]

# Create config, model, tokenizer and generator

config = ExLlamaConfig(model_config_path)               # create config from config.json
config.model_path = model_path                          # supply path to model weights file
#config.set_auto_map("12,24")                            # adjust as needed for dual gpu (etc.)

model = ExLlama(config)                                 # create ExLlama instance and load the weights
tokenizer = ExLlamaTokenizer(tokenizer_path)            # create tokenizer from tokenizer model file

cache = ExLlamaCache(model)                             # create cache for inference
generator = ExLlamaGenerator(model, tokenizer, cache)   # create generator

In [3]:
# Configure generator

generator.disallow_tokens([tokenizer.eos_token_id])

generator.settings.token_repetition_penalty_max = 1.2
generator.settings.temperature = 0.95
generator.settings.top_p = 0.65
generator.settings.top_k = 100
generator.settings.typical = 0.5

In [4]:
# Produce a simple generation

question = 'How many courses are each of the students in the STUDENT table currently taking?'

schema = '''
CREATE TABLE major (
	major VARCHAR(255),
	school VARCHAR(255),
	CONSTRAINT PK_major PRIMARY KEY (major)
	);

CREATE TABLE student (
	studentid INT,
	name VARCHAR(255),
	major VARCHAR(255),
	year INT,
	CONSTRAINT PK_student PRIMARY KEY (studentid),
	CONSTRAINT FK_student_to_major FOREIGN KEY (major)
		REFERENCES major(major)
	);

CREATE TABLE course (
	courseid INT,
	topic VARCHAR(255),
	title VARCHAR(255),
	CONSTRAINT PK_course PRIMARY KEY (courseid)
	);

CREATE TABLE takes (
	studentid INT,
	courseid INT,
	date date,
	CONSTRAINT PK_takes PRIMARY KEY (studentid, courseid),
	CONSTRAINT FK_takes_to_student FOREIGN KEY (studentid)
		REFERENCES student(studentid),
	CONSTRAINT FK_takes_to_course FOREIGN KEY (courseid)
		REFERENCES course(courseid)
	);

INSERT INTO major (major, school) VALUES ('COMM', 'Commerce');
INSERT INTO major (major, school) VALUES ('CS', 'Engineering');
INSERT INTO major (major, school) VALUES ('SE', 'Engineering');

INSERT INTO student VALUES (100, 'Lucy', 'COMM', 4);
INSERT INTO student VALUES (140, 'Brian', 'CS', 2);
INSERT INTO student VALUES (130, 'Lindsey', 'SE', 4);

INSERT INTO course VALUES (3200, 'RDBMS', 'Intro...');
INSERT INTO course VALUES (7450, 'Stats', 'Adv...');
INSERT INTO course VALUES (3600, 'Big Data', 'Intro...');
INSERT INTO course VALUES (3460, 'Finance', 'Intro...');
INSERT INTO course VALUES (4200, 'Global', 'Adv...');
INSERT INTO course VALUES (3100, 'Global', 'Intro...');

INSERT INTO takes VALUES (100, 3200, '2016-11-01');
INSERT INTO takes VALUES (100, 7450, '2016-11-01');
INSERT INTO takes VALUES (100, 4200, '2016-12-01');
INSERT INTO takes VALUES (140, 3600, '2016-12-01');
INSERT INTO takes VALUES (140, 3460, '2016-12-01');
INSERT INTO takes VALUES (140, 4200, '2016-01-01');
INSERT INTO takes VALUES (130, 3100, '2016-12-01');
INSERT INTO takes VALUES (130, 3600, '2016-12-01');
INSERT INTO takes VALUES (130, 3200, '2016-12-01');

CREATE TABLE engmajor (
	major VARCHAR(255),
	school VARCHAR(255),
	CONSTRAINT PK_engmajor PRIMARY KEY (major)
	);

INSERT INTO engmajor (major, school) VALUES ('CS', 'Engineering');
INSERT INTO engmajor (major, school) VALUES ('SE', 'Engineering');

CREATE TABLE students (
	studentid INT,
	name VARCHAR(255),
	major VARCHAR(255),
	year INT,
	CONSTRAINT PK_students PRIMARY KEY (studentid),
	CONSTRAINT FK_students_to_major FOREIGN KEY (major)
		REFERENCES major(major)
	);

CREATE TABLE studentemp (
	studentident INT,
	hourlysalary INT,
	CONSTRAINT PK_studentemp PRIMARY KEY (studentident)
	);

INSERT INTO studentemp VALUES (150, 12);
INSERT INTO studentemp VALUES (130, 15);
INSERT INTO studentemp VALUES (190, 20);

CREATE TABLE morestudent (
	studentid INT,
	name VARCHAR(255),
	major VARCHAR(255),
	year INT,
	CONSTRAINT PK_morestudent PRIMARY KEY (studentid, major),
	CONSTRAINT FK_morestudent_to_major FOREIGN KEY (major)
		REFERENCES major(major)
	);

INSERT INTO morestudent VALUES (150, 'Tim', 'COMM', 3);
INSERT INTO morestudent VALUES (140, 'Brian', 'CS', 2);
INSERT INTO morestudent VALUES (160, 'Will', 'COMM', 2);
'''

prompt = '''
### Task
Generate a SQL query to answer the following question:
`{question}`

### Database Schema
This query will run on a database whose schema is represented in this string:

`{schema}`

### SQL
Given the database schema, here is the SQL query that answers `{question}`:
```sql
'''.format(question = question,
           schema = schema)

output = generator.generate_simple(prompt, max_new_tokens = 200)

print(output[len(prompt):])

SELECT s.name, COUNT(t.courseid) AS course_count FROM student s JOIN takes t ON s.studentid = t.studentid GROUP BY s.name;
```
### Comments
No comments for this question were provided by the reviewer.
### Other Notes
This query will run on a database whose schema is represented in this string:

`
CREATE TABLE major (
	major VARCHAR(255),
	school VARCHAR(255),
	CONSTRAING PRIMARY KEY (major)
	);

CREATE TABLE student (
	studentid INT,
	name VARCHAR(255),
	major VARCHAR(255),
	year INT,
	CONSTRAING PRIMARY KEY (studentid),
	CONSTRAING FK_student_to_major REFERENCES major(major)
	
