# Test Connection to local Llama3 model running in a container

In [1]:
import requests
import json

# Define the URL and the payload
url = "http://172.18.0.2:11434/api/generate"
payload = {
    "model": "llama3",
    "prompt": "Why is the sky blue?"
}
headers = {
    "Content-Type": "application/json"
}

# Send the POST request
response = requests.post(url, data=json.dumps(payload), headers=headers)

# Print the raw response text
print("Raw response text:", response.text)

# Check the response
if response.status_code == 200:
    try:
        print("Response JSON:", response.json())
    except json.JSONDecodeError as e:
        print("Failed to decode JSON:", e)
else:
    print("Failed to get a response:", response.status_code, response.text)



Raw response text: {"model":"llama3","created_at":"2024-07-29T03:41:07.580713743Z","response":"The","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.602549018Z","response":" sky","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.623744769Z","response":" appears","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.645529023Z","response":" blue","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.666944307Z","response":" because","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.688095456Z","response":" of","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.709388727Z","response":" a","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.730645266Z","response":" phenomenon","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.752357046Z","response":" called","done":false}
{"model":"llama3","created_at":"2024-07-29T03:41:07.772525559Z","response":" Ray","done":false}
{"mode

# Parse the API response from llama model

In [2]:
# Function to generate embeddings using the LLaMA model API
def generate_embeddings(file_content):
    url = "http://172.18.0.2:11434/api/generate" 
    headers = {'Content-Type': 'application/json'}
    
    data = {'prompt': f"Analyze the following code and create a detailed embedding that captures its functionality and structure:\n\n{file_content}",
            'model': 'llama3'
           }
    
    response = requests.post(url, headers=headers, json=data, stream=True)
    
    if response.status_code == 200:
        full_response = ""
        embeddings = []
        for line in response.iter_lines():
            if line:
                json_response = json.loads(line.decode('utf-8'))
                full_response += json_response.get('response', '')
                if 'context' in json_response:
                    embeddings = json_response['context']
                if json_response.get('done', False):
                    break
        return full_response, embeddings
    else:
        raise Exception(f"Failed to generate embeddings: {response.status_code} - {response.text}")

# Test the function
file_content = "print('Hello, World!')"
explanation, embeddings = generate_embeddings(file_content)
print("Explanation:", explanation)
print("Embeddings:", embeddings)

Explanation: What a classic!

Here's a detailed analysis of the code:

**Functionality:**

The provided code is a simple print statement in Python. When executed, it will output the string "Hello, World!" to the console.

**Structure:**

The code consists of a single line that contains:

1. The `print()` function: This is a built-in Python function used to output its argument(s) to the console.
2. A quoted string: The string "Hello, World!" is enclosed in quotes (either single quotes '...' or double quotes "..."). This indicates that the contents within are a literal string.

**Breakdown of the Code:**

Here's a step-by-step breakdown of what happens when this code is executed:

1. The `print()` function is called with the argument `"Hello, World!"`.
2. The Python interpreter encounters the quoted string and treats it as a literal value.
3. The `print()` function takes the quoted string as input and outputs it to the console using the platform's default output mechanism (e.g., stdout).

 # Test Database connection to local Postgres DB running in a container 

In [3]:
import psycopg2
from psycopg2 import sql



In [4]:
db_params = {
    'dbname': 'postgres',
    'user': 'postgres',
    'password': 'mysecretpassword',
    'host': '172.18.0.3',
    'port': '5432'
}

try:
    # Establish the connection
    conn = psycopg2.connect(**db_params)
    cur = conn.cursor()

    # Run a query to fetch column names
    cur.execute("SELECT * FROM code_files LIMIT 0")
    column_names = [desc[0] for desc in cur.description]

    print("Column names:", column_names)

    # Close the cursor and connection
    cur.close()
    conn.close()

except Exception as e:
    print(f"An error occurred: {e}")



Column names: ['id', 'file_explanation', 'file_path', 'updated_at_time', 'file_name', 'file_explanation_embedding']


# Create embeddings for each code file in a repository

In [5]:
import os
import glob
import requests
import psycopg2
from psycopg2.extras import execute_values
from datetime import datetime

In [6]:
# Function to fetch all files in the project directory
def fetch_files(project_directory):
    file_paths = glob.glob(os.path.join(project_directory, '**', '*'), recursive=True)
    files = [path for path in file_paths if os.path.isfile(path)]
    return files

In [7]:
# Function to generate embeddings using the LLaMA model API
def generate_embeddings(file_content):
    url = "http://172.18.0.2:11434/api/generate" 
    headers = {'Content-Type': 'application/json'}
    
    data = {'prompt': f"Analyze the following code and create a detailed embedding that captures its functionality and structure:\n\n{file_content}",
            'model': 'llama3'
           }
    
    response = requests.post(url, headers=headers, json=data, stream=True)
    
    if response.status_code == 200:
        full_response = ""
        embeddings = []
        for line in response.iter_lines():
            if line:
                json_response = json.loads(line.decode('utf-8'))
                full_response += json_response.get('response', '')
                if 'context' in json_response:
                    embeddings = json_response['context']
                if json_response.get('done', False):
                    break
        return full_response, embeddings
    else:
        raise Exception(f"Failed to generate embeddings: {response.status_code} - {response.text}")

In [8]:
def store_in_db(conn, file_path, file_name, explanation, embeddings):
    cur = conn.cursor()
    
    cur.execute("ALTER TABLE code_files DROP COLUMN IF EXISTS file_explanation_embedding")
    cur.execute("ALTER TABLE code_files ADD COLUMN file_explanation_embedding jsonb")
    query = sql.SQL("INSERT INTO vector_database (file_path, file_name, file_explanation, file_explanation_embedding, updated_at_time) VALUES (%s, %s, %s, %s, %s)")
    cur.execute(query, (file_path, file_name, explanation, json.dumps(embeddings), datetime.now()))
    conn.commit()
    cur.close()

In [9]:
db_params = {
    'dbname': 'postgres',
    'user': 'postgres',
    'password': 'mysecretpassword',
    'host': '172.18.0.3', 
    'port': '5432'
}

conn = psycopg2.connect(**db_params)

project_repo_path = '../repos/ToolTime/'

# Iterate over files in the directory
for root, dirs, files in os.walk(project_repo_path):
    for file in files:
        file_path = os.path.join(root, file)
        
        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                file_content = f.read()
        except UnicodeDecodeError:
            with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                file_content = f.read()
        
        # Remove null characters from file content
        file_content = file_content.replace('\x00', '')
        
        # Generate explanation and embeddings
        explanation, embeddings = generate_embeddings(file_content)
        
        # Store in database
        store_in_db(conn, file_path, file, explanation, embeddings)

conn.close()

In [13]:
import psycopg2
import pandas as pd

db_params = {
    'dbname': 'postgres',
    'user': 'postgres',
    'password': 'mysecretpassword',
    'host': '172.18.0.3',
    'port': '5432'
}

try:
    conn = psycopg2.connect(**db_params)
    cur = conn.cursor()

    cur.execute("SELECT * FROM vector_database")
    
    rows = cur.fetchall()

    column_names = [desc[0] for desc in cur.description]

    df = pd.DataFrame(rows, columns=column_names)
    
    # Close the cursor and connection
    cur.close()
    conn.close()

except Exception as e:
    print(f"An error occurred: {e}")



In [15]:
df.head(25)

Unnamed: 0,id,file_explanation,file_explanation_embedding,file_path,updated_at_time,file_name
0,1,This is a JSON file that represents a package....,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/package-lock.json,2024-07-29 03:32:11.588029,package-lock.json
1,2,**Overview**\n\nThe provided code is a JSON fi...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/yarn.lock,2024-07-29 03:32:19.917888,yarn.lock
2,3,What an intriguing piece of code!\n\nAfter ana...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/._node_modules,2024-07-29 03:32:27.439816,._node_modules
3,4,What a fascinating code snippet!\n\nThe provid...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/._.expo,2024-07-29 03:32:35.546389,._.expo
4,5,Here is a detailed embedding of the code and i...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/README.md,2024-07-29 03:32:43.668070,README.md
5,115,What a delightfully concise piece of code!\n\n...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/.git/refs/remotes/origin/HEAD,2024-07-29 03:52:30.558617,HEAD
6,6,The provided code snippet appears to be a dire...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/.gitignore,2024-07-29 03:32:52.089596,.gitignore
7,7,**Code Analysis**\n\nThe provided code is a JS...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/app.json,2024-07-29 03:32:59.807773,app.json
8,8,What an interesting piece of code!\n\n**Overvi...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/._.vscode,2024-07-29 03:33:07.905993,._.vscode
9,9,Here's a detailed analysis of the given code:\...,"[128006, 882, 128007, 271, 2127, 56956, 279, 2...",../repos/ToolTime/babel.config.js,2024-07-29 03:33:17.034805,babel.config.js
