# Preparation

## Environment info

### OS

In [1]:
import platform
print(platform.platform())

Linux-6.1.123+-x86_64-with-glibc2.35


### GPU

In [2]:
import torch

# Check if CUDA is available
print("CUDA available:", torch.cuda.is_available())
print("CUDA device count:", torch.cuda.device_count())
if torch.cuda.is_available():
    print("Current device:", torch.cuda.current_device())
    print("Device name:", torch.cuda.get_device_name(0))
    gpu_info = !nvidia-smi
    cuda_info = !nvcc --version
    print('**GPU INFO:**')
    print('\n'.join(gpu_info))
    print('**CUDA INFO:**')
    print('\n'.join(cuda_info))
else:
    print("CUDA is not available on this system.")



CUDA available: True
CUDA device count: 1
Current device: 0
Device name: Tesla T4
**GPU INFO:**
Thu Jul  3 07:56:26 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   57C    P8             11W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+-

### Memory

In [3]:
import psutil
ram_gb = psutil.virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

Your runtime has 13.6 gigabytes of available RAM



### CPU

In [4]:
print(f"Your runtime has {psutil.cpu_count(logical=True)} processor(s)")

Your runtime has 2 processor(s)


### Python

In [5]:
!python --version

Python 3.11.13


## Setup

### Configuration

In [6]:
import os

ROOT_PATH = "/content"
CODE_PATH = f"/content/src"

os.environ['CODE_PATH'] = CODE_PATH

print("Root path:", ROOT_PATH)
#!ls "$ROOT_PATH"
print(os.listdir(ROOT_PATH))


Root path: /content
['.config', 'sample_data']


### Download Source Code

In [7]:
# If directory "src" not exist then clone a new one
!pwd
![ -d "$CODE_PATH" ] || git clone --depth 1  --branch "main" https://github.com/aio25-mix002/m01-p0102 "$CODE_PATH"


/content
Cloning into '/content/src'...
remote: Enumerating objects: 16, done.[K
remote: Counting objects: 100% (16/16), done.[K
remote: Compressing objects: 100% (14/14), done.[K
remote: Total 16 (delta 0), reused 10 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (16/16), 29.96 KiB | 29.96 MiB/s, done.


In [8]:
%cd "$CODE_PATH"
!git clean -fdx
!git status
!git pull
!pwd

/content/src
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean
Already up to date.
/content/src


# Main

In [9]:
%cd "$CODE_PATH"
!pwd

/content/src
/content/src


## Install packages

In [None]:
#!pip install -r requirements-torch.txt

In [10]:
!pip install -r requirements.txt

Collecting transformers==4.52.4 (from -r requirements.txt (line 2))
  Downloading transformers-4.52.4-py3-none-any.whl.metadata (38 kB)
Collecting bitsandbytes==0.46.0 (from -r requirements.txt (line 3))
  Downloading bitsandbytes-0.46.0-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting accelerate==1.7.0 (from -r requirements.txt (line 5))
  Downloading accelerate-1.7.0-py3-none-any.whl.metadata (19 kB)
Collecting langchain==0.3.25 (from -r requirements.txt (line 6))
  Downloading langchain-0.3.25-py3-none-any.whl.metadata (7.8 kB)
Collecting langchainhub==0.1.21 (from -r requirements.txt (line 7))
  Downloading langchainhub-0.1.21-py3-none-any.whl.metadata (659 bytes)
Collecting langchain-chroma==0.2.4 (from -r requirements.txt (line 8))
  Downloading langchain_chroma-0.2.4-py3-none-any.whl.metadata (1.1 kB)
Collecting langchain_experimental==0.3.4 (from -r requirements.txt (line 9))
  Downloading langchain_experimental-0.3.4-py3-none-any.whl.metadata (1.7 kB)
Collecting 

In [11]:
!pip list -v

Package                                  Version             Location                                Installer
---------------------------------------- ------------------- --------------------------------------- ---------
absl-py                                  1.4.0               /usr/local/lib/python3.11/dist-packages uv
accelerate                               1.7.0               /usr/local/lib/python3.11/dist-packages pip
aiofiles                                 24.1.0              /usr/local/lib/python3.11/dist-packages uv
aiohappyeyeballs                         2.6.1               /usr/local/lib/python3.11/dist-packages uv
aiohttp                                  3.11.15             /usr/local/lib/python3.11/dist-packages uv
aiosignal                                1.3.2               /usr/local/lib/python3.11/dist-packages uv
alabaster                                1.0.0               /usr/local/lib/python3.11/dist-packages uv
albucore                                 0.0.24  

## Environment variables

In [12]:
from dotenv import load_dotenv
load_dotenv('local.env')

False

Example of 'local.env'
```
LANGSMITH_TRACING=true
LANGSMITH_ENDPOINT="https://api.smith.langchain.com"
LANGSMITH_API_KEY="..."
LANGSMITH_PROJECT="colab-aio25-mix002-m01p0102"

NGROK_AUTHTOKEN = "..."

```

## Setup network tunneling

In [13]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.11-py3-none-any.whl.metadata (9.4 kB)
Downloading pyngrok-7.2.11-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.11


In [14]:

import getpass
import os
from pyngrok import ngrok, conf

def run_local_tunnel(port):
  # Open a ngrok tunnel to the HTTP server
  public_url = ngrok.connect(port).public_url
  print(f" * ngrok tunnel \"{public_url}\" -> \"http://127.0.0.1:{port}\"")

print("Enter your authtoken, which can be copied from https://dashboard.ngrok.com/get-started/your-authtoken")
if 'NGROK_AUTHTOKEN' in os.environ:
  print("Using NGROK_AUTHTOKEN from environment variable")
  conf.get_default().auth_token = os.environ['NGROK_AUTHTOKEN']
else:
  print("Using getpass to enter NGROK_AUTHTOKEN")
  conf.get_default().auth_token = getpass.getpass(prompt='Ngrok authtoken: ')

Enter your authtoken, which can be copied from https://dashboard.ngrok.com/get-started/your-authtoken
Using getpass to enter NGROK_AUTHTOKEN
Ngrok authtoken: ··········


## Run

In [None]:
run_local_tunnel(8501)
!streamlit run ./rag_chatbot.py --server.port 8501

 * ngrok tunnel "https://5450-34-53-116-139.ngrok-free.app" -> "http://127.0.0.1:8501"

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.53.116.139:8501[0m
[0m
2025-07-03 08:00:17.449456: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751529617.471859    1803 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751529617.478878    1803 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-03 