In [None]:
!pip install flask flask-ngrok ngrok flask_cors

Collecting flask-ngrok
  Downloading flask_ngrok-0.0.25-py3-none-any.whl.metadata (1.8 kB)
Collecting ngrok
  Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting flask_cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Downloading flask_ngrok-0.0.25-py3-none-any.whl (3.1 kB)
Downloading ngrok-1.4.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m30.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading flask_cors-5.0.1-py3-none-any.whl (11 kB)
Installing collected packages: ngrok, flask-ngrok, flask_cors
Successfully installed flask-ngrok-0.0.25 flask_cors-5.0.1 ngrok-1.4.0


In [None]:
# flask_app.py
import os
import subprocess
import io
import re
import json
import base64

# === Install runtime deps ===
def install_dependencies():
    subprocess.run([
        'pip', 'install',
        'flask', 'flask-cors', 'google-generativeai', 'pyngrok'
    ], check=True)

install_dependencies()

from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import google.generativeai as genai

app = Flask(__name__)
CORS(app)

# === Hard‑coded API keys ===
GEMINI_API_KEY = ""   # ← your key here
NGROK_TOKEN     = ""   # ← your ngrok auth token here

# === ngrok auth & tunnel ===
print("[DEBUG] Setting ngrok auth token…")
ngrok.set_auth_token(NGROK_TOKEN)
public_url = ngrok.connect(5000).public_url
print(f"[DEBUG] ngrok tunnel → {public_url}")

# === Only step‑1 keys ===
STEP1_KEYS = ["businessName", "industry", "size", "website", "description"]
def filter_step1(data: dict) -> dict:
    return {k: data.get(k, "") for k in STEP1_KEYS}

@app.route('/extract', methods=['POST'])
def extract():
    try:
        print("[DEBUG] /extract called")

        # 1) Validate upload
        if 'file' not in request.files:
            print("[ERROR] No 'file' in request.files")
            return jsonify({'error': 'No file provided'}), 400

        f = request.files['file']
        pdf_bytes = f.read()
        print(f"[DEBUG] Received file: {f.filename}, size={len(pdf_bytes)} bytes")

        # 2) Configure Gemini and call model.generate_content
        print("[DEBUG] Configuring Gemini API…")
        genai.configure(api_key=GEMINI_API_KEY)

        # Encode PDF as base64 for Gemini
        encoded = base64.b64encode(pdf_bytes).decode('utf-8')
        print("[DEBUG] PDF base64 size:", len(encoded))

        model = genai.GenerativeModel("gemini-1.5-flash")
        response = model.generate_content([
            "Extract business info as JSON with keys businessName, industry, size, website, description",
            {"mime_type": "application/pdf", "data": encoded}
        ])
        raw = response.text
        print(f"[DEBUG] Gemini response text: {raw}")

        # 3) Manually strip code fences and load JSON
        clean = re.sub(r'^```json\s*|```$', '', raw).strip()
        try:
            data = json.loads(clean)
            print(f"[DEBUG] Parsed JSON keys: {list(data.keys())}")
        except Exception as parse_err:
            print(f"[ERROR] JSON parse failed: {parse_err}")
            return jsonify({
                'error': 'Failed to parse Gemini JSON',
                'details': str(parse_err),
                'cleaned_text_snippet': clean[:200]
            }), 502

        # 4) Filter to step1 fields
        step1 = filter_step1(data)
        print(f"[DEBUG] Returning filtered data: {step1}")

        return jsonify(step1), 200

    except Exception as e:
        print(f"[EXCEPTION] {type(e).__name__}: {e}")
        return jsonify({'error': str(e)}), 500

if __name__ == '__main__':
    print("[DEBUG] Starting Flask server…")
    app.run(host='0.0.0.0', port=5000)


[DEBUG] Setting ngrok auth token…
[DEBUG] ngrok tunnel → https://a6c8-35-227-39-104.ngrok-free.app
[DEBUG] Starting Flask server…
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on all addresses (0.0.0.0)
 * Running on http://127.0.0.1:5000
 * Running on http://172.28.0.12:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


[DEBUG] /extract called
[DEBUG] Received file: version1.pdf, size=75646 bytes
[DEBUG] Configuring Gemini API…
[DEBUG] PDF base64 size: 100864


INFO:werkzeug:127.0.0.1 - - [17/May/2025 09:21:32] "POST /extract HTTP/1.1" 200 -


[DEBUG] Gemini response text: ```json
{
  "businessName": "Data Diggers",
  "industry": "Data Science, Text Generation, Social Media",
  "size": "Unknown",
  "website": null,
  "description": "A data science team that utilizes the LLaMA 3 language model, fine-tuned with Unsloth, for the generation of Instagram captions specifically designed for a Generation Z audience."
}
```
[DEBUG] Parsed JSON keys: ['businessName', 'industry', 'size', 'website', 'description']
[DEBUG] Returning filtered data: {'businessName': 'Data Diggers', 'industry': 'Data Science, Text Generation, Social Media', 'size': 'Unknown', 'website': None, 'description': 'A data science team that utilizes the LLaMA 3 language model, fine-tuned with Unsloth, for the generation of Instagram captions specifically designed for a Generation Z audience.'}
