In [4]:
!pip install langchain faiss-cpu pypdf google-generativeai
!pip install -U langchain-community
!pip install langchain faiss-cpu pypdf google-generativeai pytesseract pillow
!pip install langchain-google-genai


Collecting google-ai-generativelanguage==0.6.15 (from google-generativeai)
  Downloading google_ai_generativelanguage-0.6.15-py3-none-any.whl.metadata (5.7 kB)
Downloading google_ai_generativelanguage-0.6.15-py3-none-any.whl (1.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m14.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: google-ai-generativelanguage
  Attempting uninstall: google-ai-generativelanguage
    Found existing installation: google-ai-generativelanguage 0.6.18
    Uninstalling google-ai-generativelanguage-0.6.18:
      Successfully uninstalled google-ai-generativelanguage-0.6.18
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-google-genai 2.1.9 requires google-ai-generativelanguage<0.7.0,>=0.6.18, but you have google-ai-generativelanguage 0.6.15 which is incompatible.[0m[

Collecting langchain-community
  Downloading langchain_community-0.3.27-py3-none-any.whl.metadata (2.9 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading mypy_extensions-1.1.0-py3-none-any.whl.metadata (1.1 kB)
Downloading langchain_community-0.3.27-py3-none-any.whl (2.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m29.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (

KeyboardInterrupt: 

In [None]:
# =========================
# 0) Install dependencies
# =========================
!pip -q install google-generativeai langchain langchain-google-genai faiss-cpu pypdf pytesseract pillow

# =========================
# 1) Imports & Config
# =========================
import os, textwrap
from IPython.display import Markdown, display
from google.colab import userdata
import google.generativeai as genai

from google.colab import files
from PIL import Image
import pytesseract

from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.schema import Document
from langchain.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings

# --- API key dari Colab Secrets ---
genai.configure(api_key=userdata.get('GEMINI_API_KEY'))
assert userdata.get('GEMINI_API_KEY'), "Set Colab Secret GEMINI_API_KEY dulu ya."

# Utility tampil markdown
def show_md(md_text: str):
    display(Markdown(md_text))

show_md("## ✅ Inisialisasi selesai")

# =========================
# 2) Upload dokumen referensi (PDF) untuk RAG
#    (misal: panduan CV, best practices, contoh)
# =========================
show_md("### 📥 Upload **PDF referensi** (panduan CV)")
ref_files = files.upload()  # unggah 1 atau lebih PDF referensi
ref_docs = []

from pypdf import PdfReader

for fname in ref_files.keys():
    if fname.lower().endswith(".pdf"):
        reader = PdfReader(fname)
        for i, page in enumerate(reader.pages):
            text = page.extract_text() or ""
            if text.strip():
                ref_docs.append(Document(page_content=text, metadata={"source": fname, "page": i+1}))
    else:
        print(f"Di-skip (bukan PDF): {fname}")

assert ref_docs, "Tidak ada teks dari PDF referensi. Pastikan PDF berisi teks (bukan scan gambar), atau tambahkan PDF lain."

# Split referensi → chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=800, chunk_overlap=120)
ref_chunks = splitter.split_documents(ref_docs)

# Embeddings + FAISS index
emb = GoogleGenerativeAIEmbeddings(model="text-embedding-004")
vectordb = FAISS.from_documents(ref_chunks, emb)
retriever = vectordb.as_retriever(search_kwargs={"k": 4})

show_md("### ✅ Referensi dimuat & diindeks (FAISS)")

# =========================
# 3) Upload CV (PDF / JPG / PNG)
# =========================
show_md("### 📄 Upload **CV** (PDF/JPG/PNG)")
cv_files = files.upload()
assert len(cv_files) == 1, "Upload tepat 1 file CV."
cv_name = list(cv_files.keys())[0]
show_md(f"- File CV: **{cv_name}**")

# Ekstraksi teks CV
cv_text = ""
if cv_name.lower().endswith(".pdf"):
    reader = PdfReader(cv_name)
    for page in reader.pages:
        cv_text += (page.extract_text() or "") + "\n"
elif cv_name.lower().endswith((".jpg", ".jpeg", ".png")):
    img = Image.open(cv_name)
    cv_text = pytesseract.image_to_string(img)
else:
    raise ValueError("Format CV tidak didukung. Gunakan PDF/JPG/PNG.")

assert cv_text.strip(), "Teks CV kosong—jika PDF berupa scan gambar, unggah versi teks atau gunakan OCR (JPG/PNG)."

# =========================
# 4) Siapkan LLM (Gemini) & Prompt
# =========================
llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash", temperature=0.2)

SYSTEM_INSTRUCTION = """Anda adalah konsultan karir yang teliti dan to-the-point.
Nilailah CV pengguna dan berikan saran perbaikan berbasis referensi yang diberikan (RAG).
Gunakan bahasa Indonesia yang profesional, ringkas, dan actionable.
Format keluaran **harus** Markdown dengan heading berikut:

## Ringkasan CV
- 3–5 poin ringkasan profil kandidat.

## Penilaian Umum
- Kekuatan
- Area perbaikan

## Saran Per Bagian
### Profil/Headline
### Pengalaman Kerja
### Pendidikan
### Keterampilan/Tools
### Proyek/Portofolio (opsional)

## Contoh Bullet dengan STAR
- Tulis 2–4 bullet terukur (angka/dampak).

## Cek ATS-Friendly
- Saran kata kunci dan format.

Selalu rujuk ke referensi jika relevan, namun **jangan** menulis sitasi formal—cukup sebut “berdasarkan referensi”.
"""

USER_QUERY_DEFAULT = """Tolong nilai CV saya dan berikan saran perbaikan. Fokuskan pada pengalaman kerja dan bullet yang terukur.
"""

def build_prompt(user_query: str, retrieved_snippets: list[str], cv_text: str) -> str:
    ref_block = "\n\n".join(f"- {s}" for s in retrieved_snippets)
    prompt = f"""
{SYSTEM_INSTRUCTION}

# Referensi Terkait (cuplikan)
{ref_block}

# Teks CV Pengguna
{cv_text[:12000]}

# Pertanyaan Pengguna
{user_query}
"""
    return prompt

# =========================
# 5) RAG: retrieve → generate
# =========================
def ask_cv_assistant(user_query: str):
    # ambil potongan referensi paling relevan
    rel_docs = retriever.get_relevant_documents(user_query)
    snippets = [d.page_content[:800] for d in rel_docs]

    # gabungkan prompt
    prompt = build_prompt(user_query, snippets, cv_text)

    # panggil Gemini
    resp = llm.invoke(prompt)
    return resp.content

# =========================
# 6) Demo interaktif (sekali jalan)
# =========================
show_md("### ▶️ Demo sekali jalan")
result = ask_cv_assistant(USER_QUERY_DEFAULT)
show_md(result)

# =========================
# 7) (Opsional) Loop tanya-jawab
# =========================
# while True:
#     q = input("\nPertanyaan (ketik 'exit' untuk selesai): ").strip()
#     if q.lower() in ["exit", "quit"]:
#         break
#     show_md(ask_cv_assistant(q))


In [None]:
# # === Install Library (WAJIB di Colab, jalankan sekali saja) ===
# !pip install -q langchain-google-genai langchain faiss-cpu transformers pytesseract pillow pypdf

# # === Import Library ===
# import google.generativeai as genai
# from langchain.vectorstores import FAISS
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.document_loaders import PyPDFLoader
# from langchain.chains import RetrievalQA
# from langchain_google_genai import ChatGoogleGenerativeAI

# import pytesseract
# from PIL import Image
# from google.colab import files
# import os

# # === Konfigurasi API Gemini ===
# genai.configure(api_key="AIzaSyCQ-8j1s2qrxB6RHcMQRHc0NOrUvAXcneE")  # ganti dengan API key kamu

# # === Upload file (PDF atau JPG/PNG) ===
# uploaded = files.upload()
# filename = list(uploaded.keys())[0]
# print("File yang diupload:", filename)

# docs = []

# if filename.lower().endswith(".pdf"):
#     # Load PDF
#     loader = PyPDFLoader(filename)
#     docs = loader.load()

# elif filename.lower().endswith((".jpg", ".jpeg", ".png")):
#     # OCR untuk gambar
#     img = Image.open(filename)
#     text = pytesseract.image_to_string(img)
#     from langchain.schema import Document
#     docs = [Document(page_content=text)]

# else:
#     raise ValueError("Format file tidak didukung. Gunakan PDF atau JPG/PNG.")

# # === Split text jadi chunks ===
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
# split_docs = text_splitter.split_documents(docs)

# # === Buat embeddings + simpan ke FAISS ===
# embeddings = HuggingFaceEmbeddings()
# db = FAISS.from_documents(split_docs, embeddings)

# # === Buat retriever ===
# retriever = db.as_retriever()

# # === Buat LLM dengan Gemini API ===
# llm = ChatGoogleGenerativeAI(
#     model="gemini-1.5-flash",   # bisa diganti "gemini-1.5-pro"
#     google_api_key="AIzaSyCQ-8j1s2qrxB6RHcMQRHc0NOrUvAXcneE"
# )

# qa = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=retriever,
#     chain_type="stuff"
# )

# # === Demo chatbot ===
# query = input("Masukkan pertanyaan tentang CV Anda: ")
# result = qa.run(query)

# print("\n=== HASIL ANALISIS ===")
# print(result)


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-generativeai 0.8.5 requires google-ai-generativelanguage==0.6.15, but you have google-ai-generativelanguage 0.6.18 which is incompatible.[0m[31m
[0m

In [3]:


# import google.generativeai as genai
# from langchain.vectorstores import FAISS
# from langchain.embeddings import HuggingFaceEmbeddings
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from langchain.document_loaders import PyPDFLoader
# from langchain.chains import RetrievalQA
# # from langchain.llms import GooglePalm

# #Buat LLM dengan Gemini API
# from langchain_google_genai import ChatGoogleGenerativeAI

# import pytesseract
# from PIL import Image
# from google.colab import files
# import os

# # 🔑 Konfigurasi API Gemini
# genai.configure(api_key="AIzaSyCQ-8j1s2qrxB6RHcMQRHc0NOrUvAXcneE")


# # === Upload file (PDF atau JPG) ===
# uploaded = files.upload()

# filename = list(uploaded.keys())[0]
# print("File yang diupload:", filename)

# docs = []

# if filename.lower().endswith(".pdf"):
#     # Load PDF referensi (misal: tips menulis CV)
#     loader = PyPDFLoader(filename)
#     docs = loader.load()

# elif filename.lower().endswith((".jpg", ".jpeg", ".png")):
#     # OCR untuk gambar CV
#     img = Image.open(filename)
#     text = pytesseract.image_to_string(img)
#     from langchain.schema import Document
#     docs = [Document(page_content=text)]

# else:
#     raise ValueError("Format file tidak didukung. Gunakan PDF atau JPG/PNG.")

# # 2. Split text jadi chunks
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
# split_docs = text_splitter.split_documents(docs)

# # 3. Buat embeddings + simpan ke FAISS
# embeddings = HuggingFaceEmbeddings()
# db = FAISS.from_documents(split_docs, embeddings)

# # 4. Buat retriever
# retriever = db.as_retriever()

# # 5. Buat LLM dengan Gemini API
# # llm = GooglePalm(model="models/text-bison-001")  # bisa ganti dengan gemini-pro

# # qa = RetrievalQA.from_chain_type(
# #     llm=llm,
# #     retriever=retriever,
# #     chain_type="stuff"
# # )

# llm = ChatGoogleGenerativeAI(
#     model="gemini-1.5-flash",  # atau "gemini-1.5-pro"
#     google_api_key="AIzaSyCQ-8j1s2qrxB6RHcMQRHc0NOrUvAXcneE"
# )

# qa = RetrievalQA.from_chain_type(
#     llm=llm,
#     retriever=retriever,
#     chain_type="stuff"
# )


# # === Demo chatbot ===
# query = input("Masukkan pertanyaan tentang CV Anda: ")
# result = qa.run(query)
# print("\n=== HASIL ANALISIS ===")
# print(result)


ModuleNotFoundError: Module langchain_community.vectorstores not found. Please install langchain-community to access this module. You can install it using `pip install -U langchain-community`