In [14]:
import ollama
from nomic import embed
import sentence_transformers as SBERT
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from typing import Optional, List

OLLAMA_NOMIC_MODEL = 'nomic-embed-text'

def cosine_similarity(vec1: list[float], vec2: list[float]):
    """Compute cosine similarity between two vectors."""
    vec1 = np.array(vec1)
    vec2 = np.array(vec2)
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

def ollama_nomic_embed(prompt: str) -> list[float]:

    response = ollama.embeddings(
        model=OLLAMA_NOMIC_MODEL,
        prompt=prompt
    )

    return response['embedding']



def reduce_and_plot(vectors: np.ndarray, labels: Optional[List[str]] = None, title: str = '2D Embedding Projection') -> None:
    """Reduce to 2D using PCA and plot."""
    pca = PCA(n_components=2)
    reduced = pca.fit_transform(vectors)
    
    plt.figure(figsize=(8,6))
    plt.scatter(reduced[:,0], reduced[:,1], c='skyblue', s=60)
    
    if labels:
        for i, label in enumerate(labels):
            plt.text(reduced[i,0], reduced[i,1], label, fontsize=9)

    plt.title(title)
    plt.xlabel('PCA1')
    plt.ylabel('PCA2')
    plt.grid(True)
    plt.show()



In [None]:
### Creating semantic embeddings using nomic via ollama model
### Test cases ###

response = ollama.embeddings(
    model='nomic-embed-text',
    prompt='facebook/react'
)

print(response['embedding'])  # This is your vector!

response2 = ollama.embeddings(
    model='nomic-embed-text',
    prompt='vuejs/vue'
)

vec1: np.array = np.array(response['embedding'])
vec2: np.array = np.array(response2['embedding'])
vectors: np.array = np.array([vec1,vec2])

cosine_similarity(response['embedding'], response2['embedding'])





In [None]:
from pathlib import Path
import csv

foss_proj_space_csv: Path = Path("../csv_github_data/FOSS_projects_space.csv")



def vectorize_foss_names(csv_data_file: Path, embed_model: str = 'ollama') -> list[list[str]]:
    foss_name_vectors: list[list[str]] = []
    with open(csv_data_file, mode="r",newline='') as file:

        reader = csv.reader(file)

        for row in reader:
            print(row[0])
            vectorized_name: list[float] = ollama_nomic_embed(row[0]) ### row[0] is jsut the first entry without the brackets

            foss_name_vectors.append(vectorized_name)
    return foss_name_vectors

foss_name_vectors_space: list[list[str]] = vectorize_foss_names(foss_proj_space_csv)

freeCodeCamp freeCodeCamp
codecrafters-io build-your-own-x
sindresorhus awesome
EbookFoundation free-programming-books
public-apis public-apis
jwasham coding-interview-university
kamranahmedse developer-roadmap
donnemartin system-design-primer
996icu 996.ICU
vinta awesome-python
facebook react
awesome-selfhosted awesome-selfhosted
practical-tutorials project-based-learning
vuejs vue
TheAlgorithms Python
torvalds linux
trekhleb javascript-algorithms
tensorflow tensorflow
getify You-Dont-Know-JS
CyC2018 CS-Notes
ossu computer-science
ohmyzsh ohmyzsh
Significant-Gravitas AutoGPT
twbs bootstrap
microsoft vscode
flutter flutter
github gitignore
jackfrued Python-100-Days
trimstray the-book-of-secret-knowledge
jlevy the-art-of-command-line
AUTOMATIC1111 stable-diffusion-webui
Snailclimb JavaGuide
airbnb javascript
huggingface transformers
avelino awesome-go
ollama ollama
ytdl-org youtube-dl
massgravel Microsoft-Activation-Scripts
vercel next.js
labuladong fucking-algorithm
golang go
yangshun 

UnboundLocalError: cannot access local variable 'foss_name_vectors' where it is not associated with a value