# Script-Style Embedding Generation (Jupyter Version)
This cell provides a script-style, end-to-end workflow for fetching book data from Google Sheets, generating embeddings, applying t-SNE and UMAP, and saving the results to a CSV file.

In [None]:
!pip install gspread google-auth google-auth-oauthlib google-auth-httplib2 oauth2client
!pip install sentence-transformers
!pip install umap-learn

In [None]:
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from sentence_transformers import SentenceTransformer
import numpy as np
from sklearn.manifold import TSNE
import umap
import pandas as pd

# Set up Google Sheets API credentials
scope = [
    'https://spreadsheets.google.com/feeds',
    'https://www.googleapis.com/auth/drive'
]
creds = ServiceAccountCredentials.from_json_keyfile_name('service_account.json', scope)
client = gspread.authorize(creds)

# Fetch data from sheet
sheet = client.open_by_key('1moYiL52ZN9F20QZ-uYoO91Bh3AtkJYEoNcyv6MuRI2Y').sheet1
data = sheet.get_all_records(numericise_ignore=['all'])  # Treat all values as strings

# Concatenate description, title, author, and shelves to be used as model inputs
book_data = [row['description'] + " " + row['title'] + " " + row['author'] + row['bookshelves'] for row in data]

# Load pre-trained model
model = SentenceTransformer('all-MiniLM-L6-v2')

# Generate embeddings
embeddings = model.encode(book_data)

# Apply t-SNE to reduce embeddings to 2D
tsne_2d = TSNE(n_components=2, random_state=42)
embeddings_2d = tsne_2d.fit_transform(embeddings)

# Apply t-SNE to reduce embeddings to 3D
tsne_3d = TSNE(n_components=3, random_state=42)
embeddings_3d = tsne_3d.fit_transform(embeddings)

# Apply UMAP to reduce embeddings to 2D
umap_2d = umap.UMAP(n_components=2, random_state=42)
embeddings_2d_umap = umap_2d.fit_transform(embeddings)

# Apply UMAP to reduce embeddings to 3D
umap_3d = umap.UMAP(n_components=3, random_state=42)
embeddings_3d_umap = umap_3d.fit_transform(embeddings)

# Add embeddings to data
for i, row in enumerate(data):
    row['embedding'] = str(embeddings[i].tolist())
    row['embedding_2d_tsne'] = str(embeddings_2d[i].tolist())
    row['embedding_3d_tsne'] = str(embeddings_3d[i].tolist())
    row['embedding_2d_umap'] = str(embeddings_2d_umap[i].tolist())
    row['embedding_3d_umap'] = str(embeddings_3d_umap[i].tolist())

# Convert to a DataFrame
df = pd.DataFrame(data)

# Save to CSV (or update Google Sheet if desired)
df.to_csv('books_with_embeddings.csv', index=False)

# To update the Google Sheet, uncomment the following lines:
updated_data = [df.columns.values.tolist()] + df.values.tolist()
sheet.update('A1', updated_data)
