# AI Immigration & Citizenship Guidance Assistant


## Overview

This project scrapes official government immigration or citizenship
guidance pages, extracts visible readable text, and uses an OpenAI model
to generate a concise, structured, and personalized summary based on a
defined user profile.

In [1]:
import os
import requests
from dotenv import load_dotenv
from bs4 import BeautifulSoup
from IPython.display import Markdown, display
from openai import OpenAI

# ENVIRONMENT SETUP & API KEYS

In [None]:
load_dotenv(override=True)
api_key = os.getenv("OPENAI_API_KEY")

if not api_key:
    print("No API key was found - please set OPENAI_API_KEY in your .env file.")
elif not api_key.startswith("sk-proj-"):
    print("An API key was found, but it doesn't start with sk-proj-; please check you're using the right key.")
elif api_key.strip() != api_key:
    print("An API key was found, but it may have leading/trailing whitespace - please remove it.")
else:
    print("API key found and looks good so far!")

openai = OpenAI()

# WEB SCRAPING SETUP

In [3]:
headers = {
    "User-Agent": (
        "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
        "AppleWebKit/537.36 (KHTML, like Gecko) "
        "Chrome/121.0.0.0 Safari/537.36"
    )
}

class Website:
    """
    Represents a single webpage to scrape and clean text from.
    """

    def __init__(self, url):
        self.url = url
        self.title = "Unknown title"
        self.text = "No content"
        try:
            response = requests.get(url, headers=headers, timeout=15)
            response.raise_for_status()

            soup = BeautifulSoup(response.content, "html.parser")

            self.title = soup.title.string.strip() if soup.title and soup.title.string else "No title found"

            if soup.body:
                for irrelevant in soup.body(["script", "style", "img", "input", "noscript", "svg"]):
                    irrelevant.decompose()

                self.text = soup.body.get_text(separator="\n", strip=True)
            else:
                self.text = "No body content found"

        except requests.RequestException as e:
            print(f"Error fetching website: {e}")
            self.title = "Error loading page"
            self.text = "Could not load page content"


# AI ASSISTANT FUNCTIONS

In [4]:
def policy_assistant(system_prompt, user_prompt):
    """
    Sends prompts to OpenAI and returns the assistant response (Markdown).
    """
    response = openai.chat.completions.create(model="gpt-4o-mini", messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_prompt}])
    return response.choices[0].message.content


# AI SYSTEM CONFIGURATION

In [5]:
system_prompt = """
You are an assistant that analyzes official countries government guidance website pages.
Your job is to extract what matters most for immigrants, residency permits, and citizenship/naturalisation.

Rules:
1) Do not invent facts. Only use the information present in the provided page text.
2) Summarize in clear, practical language.
3) Highlight: eligibility, required documents, steps, timelines, costs, and common pitfalls (if present).
4) If the page mentions legal references (laws/decrees/articles), list them.
5) Output in clean Markdown.
6) Add a short section: "Who this affects most" (students, workers, spouses, refugees, etc.) based on the page.
7) End with: "Not legal advice â€” verify on the official page."
"""

def user_prompt_for_policy(website, user_profile):
    """
    Combines the user profile + the scraped guidance page into one prompt.
    Truncates page text to avoid token overload.
    """
    return f"""
User profile / situation:
{user_profile}

You are analyzing this OFFICIAL page:
Title: {website.title}
URL: {website.url}

Task:
- Summarize the key rules and steps.
- Tell me what parts are most relevant to the user profile.
- If something is missing from the page, say what info would be needed.

PAGE TEXT (truncated):
{website.text[:5000]}
"""

 # MAIN EXECUTION

In [None]:
if __name__ == "__main__":
    print("Starting AI Immigration Policy Assistant (Service-Public.fr)...")
    print("=" * 60)

    # Pick ONE Service-Public page you want to analyze
    # Example: naturalisation
    website_url = "https://www.gov.uk/browse/visas-immigration"

    print(f" Scraping official guidance from: {website_url}")
    website = Website(website_url)

    print(f"Website Title: {website.title}")
    print(f"Content Length: {len(website.text)} characters\n")

    # Example user profile (edit this)
    user_profile = (
        "I live in UK as an immigrant and I'm planning to apply for permanent residency renewal "
        "in the next 3 months. I want to know the steps, requirements, and pitfalls."
    )

    prompt = user_prompt_for_policy(website, user_profile)

    print("Generating AI summary and guidance...\n")
    output = policy_assistant(system_prompt, prompt)

    display(Markdown(output))