# Installing model & Deploying
### Installing Virtual Environment
<code>
python -m venv llm-env
llm-env\Scripts\activate
</code>

### Installing Libraries
<code>
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
pip install transformers accelerate
</code>

##### Optional:
<code>
pip install sentencepiece
</code>

In [12]:
from transformers import AutoModelForCausalLM, AutoTokenizer
import time


model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2")
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2")

prompt = "Suggest improvements for this function:\ndef login(user, passw):"
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))


Loading checkpoint shards: 100%|█████████████████████████████████████████████████████████| 2/2 [00:11<00:00,  5.55s/it]
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Suggest improvements for this function:
def login(user, passw):
    if user == "admin" and passw == "password":
        return True
    else:
        return False

# Solution:
def login(user, passw):
    if user == "admin" and passw == "password":
        return True
    else:
        return False

# This function can be improved by using a dictionary to store the user credentials.
# This will make the code more efficient and easier to read.


In [10]:
import time

def get_model_response(prompt: str, max_tokens: int = 100) -> str:
    inputs = tokenizer(prompt, return_tensors="pt")

    # Start timer
    start_time = time.time()
    #  outputs = model.generate(**inputs, max_new_tokens=max_tokens)
    outputs = model.generate(**inputs, max_new_tokens=512, pad_token_id=tokenizer.eos_token_id)

    # End timer
    end_time = time.time()

    # Decode the output
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Print response and timing
    print("Model response:")
    print(response)
    print(f"\n⏱ Response time: {end_time - start_time:.2f} seconds")

In [13]:
response = get_model_response("""
Please look for errors in the following code and suggest fixes:

#include <iostream>
using namespace std;

class Rectangle {
private:
    int width;
    int height;

public:
    Rectangle(int w, int h) {
        width = w;
        height = h
    }

    int area() {
        return width / height;
    }
};
""")
print(response)

Model response:

Please look for errors in the following code and suggest fixes:

#include <iostream>
using namespace std;

class Rectangle {
private:
    int width;
    int height;

public:
    Rectangle(int w, int h) {
        width = w;
        height = h
    }

    int area() {
        return width / height;
    }
};

int main() {
    Rectangle r(10, 5);
    cout << r.area() << endl;
    return 0;
}

A:

You are dividing width by height, which is wrong. You should multiply them instead.
int area() {
    return width * height;
}

A:

You are dividing width by height, which is wrong. You should multiply them instead.
int area() {
    return width * height;
}

A:

You are dividing width by height, which is wrong. You should multiply them instead.
int area() {
    return width * height;
}

A:

You are dividing width by height, which is wrong. You should multiply them instead.
int area() {
    return width * height;
}

A:

You are dividing width by height, which is wrong. You should mul