In [5]:
from google import genai
import json
import os
from dotenv import load_dotenv

In [12]:
# --- Load Environment Variables ---
# This line reads your .env file and adds the variables to os.environ
load_dotenv() 

client = genai.Client(api_key = os.environ.get("GOOGLE_API_KEY"))

In [13]:
response = client.models.generate_content(
    model="gemini-2.5-flash",
    contents="How does AI work?"
)
print(response.text)

AI, at its core, works by enabling machines to **learn from data** and **make decisions or predictions** without being explicitly programmed for every single scenario. Instead of following a rigid set of rules given by a human, AI systems are designed to identify patterns, draw inferences, and adapt their behavior.

Let's break down the fundamental process, primarily focusing on **Machine Learning (ML)**, which is the most prevalent form of AI today:

### The Core Pillars of How AI Works

1.  **Data: The Fuel**
    *   **Collection:** AI systems need vast amounts of data relevant to the task they're designed for. This could be images, text, audio, numbers, sensor readings, etc.
    *   **Preparation:** Raw data is messy. It needs to be cleaned, organized, and often "labeled." Labeling means adding tags or categories to the data (e.g., marking photos as "cat" or "dog," identifying spam emails). This is crucial for *supervised learning*.
    *   **Quality & Quantity:** The performance of

In [14]:
generation_config = genai.types.GenerationConfig(
    response_mime_type="application/json"
)

In [29]:
# --- The Classifier Function ---

def classify_text(text_to_classify: str, client, categories: list) -> list | None:
    """
    Classifies a text into multiple categories using the Gemini API.

    Args:
        text_to_classify: The input string (sentence, paragraph, etc.).
        categories: A list of category strings.

    Returns:
        A list of 0s and 1s corresponding to the categories,
        or None if an error occurs.
    """
    
    # 1. Initialize the model
    # We use gemini-1.5-pro for strong instruction-following
       
    # 2. Configure the model for JSON output
    generation_config = genai.types.GenerateContentConfig(
        response_mime_type="application/json"
    )

    # 3. Create the prompt
    # This prompt instructs the model on its task and the exact output format.
    prompt = f"""
    You are a highly accurate multi-label text classifier.
    Your task is to analyze the user's text and determine which of the provided
    categories it belongs to. A text can belong to zero, one, or multiple categories.

    These are the {len(categories)} categories:
    {categories}

    Analyze the following text:
    "{text_to_classify}"

    Respond with a JSON object containing a single key "classification_array".
    The value of this key must be a Python list (array) of {len(categories)} integers (0 or 1).
    - Use 1 if the text belongs to the category.
    - Use 0 if the text does not belong to the category.
    
    The order of the 0s and 1s must match the order of the categories provided.
    """

    print(f"--- Sending request for: '{text_to_classify}' ---")
    
    try:
        # 4. Send the request
        response = client.models.generate_content(
            model = "gemini-2.5-flash",
            contents = prompt,
            config=generation_config
        )
        
        # 5. Parse the JSON response
        # The model's response.text will be a raw JSON string
        response_data = json.loads(response.text)
        
        classification_array = response_data.get('classification_array')
        
        # 6. Validate the output
        if (isinstance(classification_array, list) and
            len(classification_array) == len(categories) and
            all(isinstance(x, int) and x in [0, 1] for x in classification_array)):
            
            return classification_array
        else:
            print(f"Error: Model output was not in the expected format.")
            print(f"Raw response: {response.text}")
            return None

    except Exception as e:
        print(f"An error occurred during API call or parsing: {e}")
        print(f"Raw response parts (if available): {response.parts if 'response' in locals() else 'N/A'}")
        return None

In [30]:
# --- Example Usage ---

# Your 10 categories
my_categories = [
    "Technology",
    "Sports",
    "Politics",
    "Finance",
    "Health",
    "Education",
    "Entertainment",
    "Travel",
    "Food",
    "Science"
]

# --- Test Case 1: Politics & Finance ---
sentence1 = "The prime minister announced a new tax policy that will heavily impact the stock market."
result1 = classify_text(sentence1, client, my_categories)

if result1:
    print(f"\nCategories: {my_categories}")
    print(f"Result:     {result1}\n")
    # Expected output (approx): [0, 0, 1, 1, 0, 0, 0, 0, 0, 0]

--- Sending request for: 'The prime minister announced a new tax policy that will heavily impact the stock market.' ---

Categories: ['Technology', 'Sports', 'Politics', 'Finance', 'Health', 'Education', 'Entertainment', 'Travel', 'Food', 'Science']
Result:     [0, 0, 1, 1, 0, 0, 0, 0, 0, 0]

