In [None]:
# STEP 1: Install required packages
!pip install --quiet openai google-auth google-auth-oauthlib google-auth-httplib2 google-api-python-client PyMuPDF python-docx

# STEP 2: Authenticate Google Drive
from google.colab import auth
auth.authenticate_user()

from googleapiclient.discovery import build
from googleapiclient.http import MediaIoBaseDownload
import io

drive_service = build('drive', 'v3')

# STEP 3: List files in a folder
def list_files(folder_id):
    query = f"'{folder_id}' in parents and trashed = false"
    results = drive_service.files().list(q=query, fields="files(id, name, mimeType)").execute()
    return results.get('files', [])

# STEP 4: Download and read file content
def download_file(file_id, mime_type):
    request = drive_service.files().get_media(fileId=file_id)
    file_data = io.BytesIO()
    downloader = MediaIoBaseDownload(file_data, request)
    done = False
    while not done:
        status, done = downloader.next_chunk()
    file_data.seek(0)

    if mime_type == 'application/pdf':
        import fitz  # PyMuPDF
        doc = fitz.open(stream=file_data.read(), filetype='pdf')
        text = "\n".join([page.get_text() for page in doc])
        return text
    elif mime_type == 'application/vnd.openxmlformats-officedocument.wordprocessingml.document':
        from docx import Document
        doc = Document(file_data)
        return "\n".join([p.text for p in doc.paragraphs])
    elif mime_type.startswith('text/'):
        return file_data.read().decode()
    else:
        return None

# STEP 5: Summarize using OpenAI GPT
import openai
from google.colab import userdata

openai.api_key = userdata.get('OPENAI_API_KEY')

def summarize_text(text):
    prompt = f"Summarize the following content into bullet points:\n\n{text[:3000]}"  # Truncate to fit prompt limits
    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=[{"role": "user", "content": prompt}],
        temperature=0.5,
    )
    return response.choices[0].message.content.strip()

# TEST — replace with your folder ID
folder_id = 'YOUR_FOLDER_ID_HERE' # Replace with your actual folder ID from Google Drive
files = list_files(folder_id)
for f in files:
    print(f"\n📄 {f['name']} ({f['mimeType']})")
    content = download_file(f['id'], f['mimeType'])
    if content:
        summary = summarize_text(content)
        print("🔍 Summary:\n", summary)