In the absence of dedicated GPU resources, and with limited access, especially relying on Google Colab's constrained GPU capabilities, I opted to deploy a Streamlit app for my fine-tuned Llama 2 model. To visualize the app within a Google Colab environment, the following steps were taken:


* Execute the command `!streamlit run /content/app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com`

* The output will display an IP address, such as `35.203.134.132`, along with a generated URL like `https://calm-singers-travel.loca.lt` .

* Visit the provided URL, which redirects to a website. On this website, enter the displayed IP address, e.g., `35.203.134.132`, in the input box.

* The Streamlit app will then initiate and run within the Google Colab environment, accessible through the provided URL.

In [1]:
!pip install -q streamlit langchain huggingface_hub transformers sentence_transformers accelerate bitsandbytes

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m18.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m794.4/794.4 kB[0m [31m28.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m265.7/265.7 kB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.6/92.6 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m190.6/190.6 kB[0m [31m8.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m44.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m4.3 MB/s

## Create a streamlit app example


In [2]:
%%writefile app.py
import streamlit as st
from langchain.llms import HuggingFacePipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForSeq2SeqLM, BitsAndBytesConfig
from langchain import PromptTemplate, HuggingFaceHub, LLMChain

# Load the model and tokenizer
model_id = "Guna0pro/llama-2-7b-html"
tokenizer = AutoTokenizer.from_pretrained(model_id)

# Quantization configuration
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
)

# Load base model
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=bnb_config,
    device_map={"": 0}
)

# Create a text-generation pipeline
generation_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_length=100
)

local_llm = HuggingFacePipeline(pipeline=generation_pipeline)

# Create a prompt template
template = """Generate HTML code for the following instructions:

Instructions: {instructions}

HTML Code:
"""

prompt = PromptTemplate(template=template, input_variables=["instructions"])

# Create an LLMChain
llm_chain = LLMChain(prompt=prompt, llm=local_llm)

# Streamlit app
def main():
    st.title("HTML Code Generator with Streamlit")

    # Create a list to store the conversation history
    conversation_history = []

    # Get user instructions
    user_instructions = st.text_area("Enter instructions for HTML code:")

    if st.button("Generate HTML Code"):
        # Run the HTML code generation logic
        html_code = llm_chain.run(user_instructions)

        # Display the generated HTML code
        st.subheader("Generated HTML Code:")
        st.code(html_code, language="html")

        # Add the current instructions and generated HTML code to the conversation history
        conversation_history.append((user_instructions, html_code))

    # Display the conversation history
    st.subheader("Conversation History")
    for instructions, code in conversation_history:
        st.text(f"Instructions: {instructions}")
        st.text(f"Generated HTML Code: {code}")
        st.text("-" * 30)

    # Add a button to clear the conversation history
    if st.button("Clear History"):
        conversation_history.clear()
        st.success("Conversation history cleared.")

if __name__ == "__main__":
    main()


Writing app.py


## Install localtunnel

In [3]:
!npm install localtunnel

[K[?25h[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35msaveError[0m ENOENT: no such file or directory, open '/content/package.json'
[K[?25h[37;40mnpm[0m [0m[34;40mnotice[0m[35m[0m created a lockfile as package-lock.json. You should commit this file.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m [0m[35menoent[0m ENOENT: no such file or directory, open '/content/package.json'
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No description
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No repository field.
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No README data
[0m[37;40mnpm[0m [0m[30;43mWARN[0m[35m[0m content No license field.
[0m
+ localtunnel@2.0.2
added 22 packages from 22 contributors and audited 22 packages in 1.668s

3 packages are looking for funding
  run `npm fund` for details

found 1 [93mmoderate[0m severity vulnerability
  run `npm audit fix` to fix them, or `npm audit` for details
[K[?25h

## Run streamlit in background

In [None]:
!streamlit run /content/app.py &>/content/logs.txt & npx localtunnel --port 8501 & curl ipv4.icanhazip.com

35.203.134.132
[K[?25hnpx: installed 22 in 2.218s
your url is: https://calm-singers-travel.loca.lt
