<a href="https://colab.research.google.com/github/chsrinidh-rao/Ai-Assistant-coding/blob/main/AI_LAB_5_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Data Collection Script: Safeguarding User Information

This script demonstrates how to collect basic user data safely, display it, and crucially, outlines essential considerations for data anonymization, protection, and ethical handling. Protecting personal data is paramount in any application.

In [1]:
# Python script to collect basic user data

import re # Required for email validation
import hashlib # Required for hashing identifiers

def collect_user_data():
    """Collects user's name, age, and email address safely."""
    print("--- User Data Collection ---")

    # 1. Safely collect user input
    # Name: Basic string input
    name = input("Please enter your name: ").strip()
    while not name: # Basic validation: ensure name is not empty
        print("Name cannot be empty. Please try again.")
        name = input("Please enter your name: ").strip()

    # Age: Integer input with validation
    age = None
    while age is None:
        try:
            age_str = input("Please enter your age: ").strip()
            if not age_str.isdigit(): # Check if input is purely digits
                raise ValueError("Age must be a number.")
            age = int(age_str)
            if age < 0 or age > 120: # Reasonable age range validation
                raise ValueError("Age must be between 0 and 120.")
        except ValueError as e:
            print(f"Invalid input for age: {e}. Please enter a valid number.")
            age = None

    # Email: String input with basic regex validation
    email = None
    email_regex = r"^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,4}$"
    while email is None:
        email_input = input("Please enter your email address: ").strip().lower()
        if re.match(email_regex, email_input):
            email = email_input
        else:
            print("Invalid email format. Please enter a valid email address.")

    user_data = {
        "name": name,
        "age": age,
        "email": email
    }
    return user_data

def display_user_data(data):
    """Displays the collected user data clearly."""
    print("\n--- Collected User Data ---")
    print(f"Name: {data['name']}")
    print(f"Age: {data['age']}")
    print(f"Email: {data['email']}")

    # Example of anonymization for display or storage (masking)
    masked_email = data['email'][0] + "*" * (data['email'].find('@') - 2) + data['email'][data['email'].find('@')-1:] if '@' in data['email'] else "*****"
    print(f"Masked Email (example anonymization): {masked_email}")

    # Example of hashing for display or storage
    # Hashing converts data into a fixed-size string of characters.
    # It's a one-way process, making it suitable for checking data integrity or
    # storing identifiers that don't need to be decrypted.
    # SHA256 is a common cryptographic hash function.
    name_hash = hashlib.sha256(data['name'].encode()).hexdigest()
    email_hash = hashlib.sha256(data['email'].encode()).hexdigest()
    print(f"Name Hash (SHA256): {name_hash}")
    print(f"Email Hash (SHA256): {email_hash}")


# Main execution
if __name__ == "__main__":
    user_info = collect_user_data()
    display_user_data(user_info)


--- User Data Collection ---
Please enter your name:  srinidh
Please enter your age: 19
Please enter your email address: srinidh@gmail.comm

--- Collected User Data ---
Name: srinidh
Age: 19
Email: srinidh@gmail.comm
Masked Email (example anonymization): s*****h@gmail.comm
Name Hash (SHA256): 6b964a14bc503e876dcee2fd2d964b4a38f21b99cf301dd782993a2ae555445c
Email Hash (SHA256): 2084d279cc29c04d558b30e5ee8c01c56191e6636a361b60c2eecb6bbc57306a


In [2]:
# Python script for training and evaluating a simple machine learning model with Ethical AI considerations

import pandas as pd # Suggestion: For data manipulation and structuring
import numpy as np # Suggestion: For numerical operations, especially in data generation
from sklearn.model_selection import train_test_split # Suggestion: For splitting data into training and testing sets
from sklearn.tree import DecisionTreeClassifier # Suggestion: A simple, interpretable model for demonstration
from sklearn.metrics import classification_report, accuracy_score # Suggestion: For model evaluation metrics

# --- 1. Data Generation with Intentional Bias ---
# Suggestion: For academic purposes, generating synthetic data allows for controlled exploration of bias.
# In real-world scenarios, data collection itself needs rigorous ethical oversight to prevent inherited biases.

n_samples = 1000 # Number of synthetic data samples
np.random.seed(42) # For reproducibility of random data generation

# Features: age, income, and a 'proxy_feature' that might represent a protected group.
age = np.random.randint(18, 70, n_samples)
income = np.random.randint(20000, 150000, n_samples)

# Ethical Consideration: Introducing a 'proxy_feature'. In real-world data, even if direct sensitive attributes
# (like gender or race) are excluded, other features (e.g., zip code, certain behavioral patterns) can act as proxies
# and implicitly carry the same biases. Here, we simulate this by creating 'group_A' (0) and 'group_B' (1).
proxy_feature = np.random.choice([0, 1], n_samples, p=[0.5, 0.5]) # 0 for 'Group A', 1 for 'Group B'

# Target variable: 'approved' (1) or 'denied' (0). Simulating a binary decision, e.g., loan approval.
# We create a base approval logic but then *intentionally* introduce bias against 'Group A'.
approved = np.zeros(n_samples, dtype=int)

for i in range(n_samples):
    # Base approval logic: Generally higher approval for higher income and middle age.
    if income[i] > 60000 and 30 < age[i] < 60:
        approved[i] = 1
    elif income[i] > 40000 and 25 < age[i] < 65 and np.random.rand() < 0.7: # Slight chance for others
        approved[i] = 1

    # Intentional Bias: A higher denial chance for 'Group A' (proxy_feature=0), overriding other factors.
    # This mimics real-world scenarios where historical biases or discriminatory practices are embedded in data.
    if proxy_feature[i] == 0: # If in 'Group A'
        if np.random.rand() < 0.6: # 60% chance to be denied, even if other factors suggest approval
            approved[i] = 0

df = pd.DataFrame({
    'age': age,
    'income': income,
    'proxy_feature': proxy_feature, # This feature will demonstrate bias
    'approved': approved
})

# Display first few rows of the generated data to understand its structure.
print("--- Sample of Generated Data ---")
print(df.head())

# --- 2. Data Splitting ---
# Separate features (X) and target (y).
X = df[['age', 'income', 'proxy_feature']]
y = df['approved']

# Split data into training and testing sets.
# Ethical Consideration: Data splitting should be done carefully. If bias exists in the overall dataset,
# it will be propagated to both training and test sets. Ensure test sets are representative of all subgroups.
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

print(f"\nTraining set size: {len(X_train)} samples")
print(f"Testing set size: {len(X_test)} samples")

# --- 3. Model Training ---
# Suggestion: Decision Trees are chosen for their relative interpretability, making it easier to explain predictions.
# How the model works at a high level (Explainability and Transparency):
# A Decision Tree learns by recursively splitting the data into subsets based on feature values.
# It seeks to find the feature and split-point (e.g., 'income > $60,000' or 'age < 30') that best separates the classes (e.g., 'approved' vs. 'denied').
# This process continues until a stopping criterion is met (e.g., `max_depth`), forming a tree-like structure of decisions.
# Each leaf node in the tree represents a final classification decision (e.g., 'approve' or 'deny').
# This "if-then-else" logic makes Decision Trees quite transparent; you can literally trace the path from input features to output prediction.

model = DecisionTreeClassifier(max_depth=5, random_state=42) # Limit depth for a simpler, more explainable tree.
model.fit(X_train, y_train)

print("\n--- Model Training Complete ---")

# --- 4. Prediction ---
y_pred = model.predict(X_test)

# --- 5. Evaluation ---
print("\n--- Model Evaluation ---")
overall_accuracy = accuracy_score(y_test, y_pred)
print(f"Overall Accuracy: {overall_accuracy:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# Accuracy Limitations and Why Results May Not Generalize:
# This is a simple Decision Tree model trained on a small, synthetic dataset.
# 1. Simplicity of features: Real-world problems involve far more complex and nuanced features.
# 2. Overfitting: Even with `max_depth` limited, simple models can sometimes overfit to training data, performing poorly on unseen data.
# 3. Limited data: Small datasets might not capture the full variance and complexity of real-world scenarios, leading to poor generalization.
# Developer Responsibility: Always validate model performance on diverse, real-world data from multiple sources and monitor it continuously post-deployment.

# --- 6. Analysis of Bias and Fairness Considerations ---

print("\n--- Bias and Fairness Analysis ---")

# Potential Sources of Bias in Training Data or Features:
# 1. Training Data Bias (Demonstrated here): Our synthetic data was *intentionally* biased where 'proxy_feature'=0 (Group A)
#    had a higher denial rate. In real-world data, such biases can arise from:
#    - Historical discrimination reflected in past human decisions that become labels in the data.
#    - Unrepresentative sampling during data collection, leading to under-representation of certain groups.
#    - Measurement errors that disproportionately affect specific subgroups.
# 2. Feature Bias: Even if direct sensitive attributes (e.g., race, gender) are excluded, other features used in the model
#    might serve as proxies. For example, a credit score model might use zip code, which could correlate with race or income.
# 3. Selection Bias: If the data used to train the model is not randomly sampled from the population it's meant to serve.

# Fairness Considerations and Risks of Misuse:
# - Disparate Impact: Does the model perform differently or lead to different outcomes for different subgroups?
#   Let's check the accuracy for our intentionally biased 'proxy_feature' subgroups.
X_test_group0 = X_test[X_test['proxy_feature'] == 0] # Filter for Group A
y_test_group0 = y_test[X_test['proxy_feature'] == 0]
y_pred_group0 = model.predict(X_test_group0)

X_test_group1 = X_test[X_test['proxy_feature'] == 1] # Filter for Group B
y_test_group1 = y_test[X_test['proxy_feature'] == 1]
y_pred_group1 = model.predict(X_test_group1)

accuracy_group0 = accuracy_score(y_test_group0, y_pred_group0)
accuracy_group1 = accuracy_score(y_test_group1, y_pred_group1)

print(f"Accuracy for Group A (proxy_feature=0): {accuracy_group0:.4f}")
print(f"Accuracy for Group B (proxy_feature=1): {accuracy_group1:.4f}")

# As shown above, due to the intentional bias in data generation, the model's accuracy (and other fairness metrics)
# can be significantly different between groups. This demonstrates a clear disparate impact, where one group is
# systematically disadvantaged by the model's decisions.
#
# Risks of Misuse:
# - Discrimination: Models trained on biased data can perpetuate and even amplify historical societal discrimination
#   in critical areas like loan applications, hiring decisions, healthcare, or criminal justice.
# - Unfair Resource Allocation: If the model is used to allocate limited resources, biased predictions can lead to
#   certain groups being unfairly disadvantaged or underserved.
# - Erosion of Trust: Users and the public will lose trust in AI systems that are perceived as unfair, biased, or discriminatory.

# Developer Responsibility and Human Oversight When Deploying AI-Generated Models:
# - Continuous Monitoring: AI models are not static. Their performance and fairness must be continuously monitored
#   post-deployment. Data distributions can shift, and subtle biases can emerge over time (concept drift, data drift).
# - Auditing for Bias: Regularly audit models for different types of bias and fairness metrics across various demographic
#   or protected groups. Utilize specialized fairness toolkits (e.g., IBM's AI Fairness 360, Google's What-If Tool) to
#   identify, quantify, and mitigate bias.
# - Human-in-the-Loop: For high-stakes decisions (e.g., medical diagnoses, critical financial approvals, legal outcomes),
#   human oversight and intervention are absolutely crucial. The AI should serve as an aid, providing insights and recommendations,
#   but not acting as the sole or final decision-maker.
# - Transparency with Users: Clearly communicate the model's purpose, capabilities, limitations, and potential biases to end-users.
#   Provide mechanisms for appeal or correction if users feel they have been unfairly treated by an AI-driven decision.
# - Ethical Deployment: Thoroughly consider the broader societal impact and potential harms before deploying any AI model.
#   Ensure the model aligns with ethical principles, legal requirements (e.g., GDPR, anti-discrimination laws), and company values.
# - Responsible AI Principle: Embed fairness, accountability, and transparency throughout the entire AI lifecycle,
#   from initial problem definition and data collection to model deployment, monitoring, and eventual decommissioning.


--- Sample of Generated Data ---
   age  income  proxy_feature  approved
0   56  125186              1         1
1   69   54674              1         0
2   46   55854              1         1
3   32   66271              1         1
4   60   93688              1         0

Training set size: 700 samples
Testing set size: 300 samples

--- Model Training Complete ---

--- Model Evaluation ---
Overall Accuracy: 0.8467

Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.93      0.89       203
           1       0.81      0.68      0.74        97

    accuracy                           0.85       300
   macro avg       0.84      0.80      0.82       300
weighted avg       0.84      0.85      0.84       300


--- Bias and Fairness Analysis ---
Accuracy for Group A (proxy_feature=0): 0.8289
Accuracy for Group B (proxy_feature=1): 0.8649


In [3]:
import logging
import os
import re # Suggestion: For potential redaction/masking of sensitive patterns if they bypass initial filters

# --- 1. Configure the Logger ---

def setup_logging(log_file='app.log', level=logging.INFO):
    """Configures the application logger with a file handler and console handler."""

    # Developer Responsibility: Logging configuration should be part of a secure application setup.
    # Ensure logs are stored in a secure location with appropriate access controls.

    logger = logging.getLogger(__name__)
    logger.setLevel(level)

    # Avoid duplicate handlers if setup is called multiple times
    if not logger.handlers:
        # File Handler: For persistent logs
        file_handler = logging.FileHandler(log_file)
        file_formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
        file_handler.setFormatter(file_formatter)
        logger.addHandler(file_handler)

        # Console Handler: For real-time monitoring during development/operations
        console_handler = logging.StreamHandler()
        console_formatter = logging.Formatter('%(name)s - %(levelname)s - %(message)s')
        console_handler.setFormatter(console_formatter)
        logger.addHandler(console_handler)

    logger.info("Logging configured.")
    return logger

# Initialize the logger
logger = setup_logging()


# --- 2. Log Common Application Events ---

def handle_request(request_data: dict):
    """Simulates handling an incoming web request and logs relevant information.

    Ethical Logging Practice: Log events at appropriate levels (INFO for normal ops, WARNING for unusual, ERROR for failures).
    """

    # Developer Responsibility: When logging request data, *always* sanitize it first.
    # Sensitive data MUST NEVER be written to logs in plain text.
    # Why sensitive data must not be logged: Logs are often less secure than databases,
    # can be accessed by more personnel, and accumulate over time, increasing the risk of exposure
    # during breaches, audits, or even accidental disclosures.

    # Examples of data that should be masked, redacted, or excluded from logs:
    # - Passwords, API Keys, Authentication Tokens (e.g., JWTs, session IDs)
    # - Personally Identifiable Information (PII) like full names, email addresses, phone numbers, SSNs, credit card numbers
    # - Health Information (PHI), Financial Data
    # - Geolocation data with high precision

    logged_data = request_data.copy() # Suggestion: Work on a copy to avoid altering original request_data

    # Data Exclusion/Redaction Example:
    if 'password' in logged_data: # Suggestion: If 'password' exists in request, remove it.
        del logged_data['password']
    if 'auth_token' in logged_data: # Suggestion: Remove sensitive tokens.
        del logged_data['auth_token']

    # Data Masking Example:
    if 'email' in logged_data: # Suggestion: Mask email addresses before logging.
        email = logged_data['email']
        # Basic masking: show first char, then ***, then domain
        masked_email = email[0] + '***' + email[email.find('@'):] if '@' in email else '***masked***'
        logged_data['email'] = masked_email

    logger.info(f"Request received: {logged_data}")
    # Ethical Logging Practice: Log just enough information to debug and audit, but no more.

def process_data(user_id: str, data: str):
    """Simulates a successful data processing action."""
    # Developer Responsibility: Ensure that any identifiers (like user_id) logged are either essential for auditing
    # and stored securely, or are pseudonymized/hashed if possible, especially in high-volume logs.
    logger.info(f"Data successfully processed for user_id: {user_id}. Data summary: {data[:20]}...")

def simulate_error(error_message: str, user_context: dict = None):
    """Simulates an error condition and logs it.

    Ethical Logging Practice: Error logs often contain stack traces which might unintentionally expose sensitive data.
    Configure loggers to redact potentially sensitive paths or variables from stack traces if possible, or review logs regularly.
    """
    context = user_context.copy() if user_context else {}
    if 'user_email' in context: # Suggestion: Redact sensitive info even from error contexts.
        context['user_email'] = '***redacted***'
    logger.error(f"Application error: {error_message}. Context: {context}")


# --- Example Usage ---

if __name__ == "__main__":
    print("--- Demonstrating Logging for a Web Application ---\n")

    # Simulate an incoming request with sensitive data
    logger.info("Simulating an incoming request...")
    request_1 = {
        'method': 'POST',
        'path': '/login',
        'ip_address': '192.168.1.100',
        'username': 'testuser',
        'password': 'supersecretpassword',
        'email': 'user@example.com',
        'auth_token': 'jwt.eyJhbGciOiJIUzI1NiJ9.eyJ1c2VySWQiOiIxMjMifQ.signature'
    }
    handle_request(request_1)

    # Simulate a successful action
    logger.info("Simulating a successful action...")
    process_data(user_id="user_abc_123", data="User settings updated for theme_dark")

    # Simulate an error with some context
    logger.info("Simulating an application error...")
    simulate_error("Database connection failed", user_context={'user_id': 'user_abc_123', 'session_id': 'xyz123', 'user_email': 'user@example.com'})

    print("\n--- Check 'app.log' file for log output. --- ")
    # Clean up the log file if it exists for a fresh run next time
    if os.path.exists('app.log'):
        # Developer Responsibility: Log files can grow large and contain historical data.
        # Implement log rotation and secure archiving/deletion policies compliant with data retention laws.
        # This simple example just removes it for repeated runs.
        # os.remove('app.log') # Uncomment to remove log file after each run
        pass


__main__ - INFO - Logging configured.
INFO:__main__:Logging configured.
__main__ - INFO - Simulating an incoming request...
INFO:__main__:Simulating an incoming request...
__main__ - INFO - Request received: {'method': 'POST', 'path': '/login', 'ip_address': '192.168.1.100', 'username': 'testuser', 'email': 'u***@example.com'}
INFO:__main__:Request received: {'method': 'POST', 'path': '/login', 'ip_address': '192.168.1.100', 'username': 'testuser', 'email': 'u***@example.com'}
__main__ - INFO - Simulating a successful action...
INFO:__main__:Simulating a successful action...
__main__ - INFO - Data successfully processed for user_id: user_abc_123. Data summary: User settings update...
INFO:__main__:Data successfully processed for user_id: user_abc_123. Data summary: User settings update...
__main__ - INFO - Simulating an application error...
INFO:__main__:Simulating an application error...
__main__ - ERROR - Application error: Database connection failed. Context: {'user_id': 'user_abc_1

--- Demonstrating Logging for a Web Application ---


--- Check 'app.log' file for log output. --- 


In [4]:
# Python program for simple product recommendation with ethical and fairness considerations

from collections import Counter # Suggestion: Use Counter to easily count frequency of items

def recommend_products(user_history: list[str]) -> list[str]:
    """Recommends products based on a simple user history (e.g., categories).

    This function uses a frequency-based approach and includes inline comments to highlight
    ethical considerations, potential biases, and developer responsibilities in AI-assisted
    recommendation systems.

    Args:
        user_history (list[str]): A list of product categories previously viewed or purchased by the user.

    Returns:
        list[str]: A list of recommended product categories.
    """

    # Predefined product catalog (simplified for demonstration)
    # Developer Responsibility: Real-world catalogs are much larger and more complex. Ensure diversity and avoid biases in catalog creation.
    product_catalog = {
        "Electronics": ["Smartphone", "Laptop", "Headphones", "Smartwatch"],
        "Books": ["Fiction Novel", "Self-Help Book", "Biography", "Science Fiction"],
        "Clothing": ["T-Shirt", "Jeans", "Jacket", "Sneakers"],
        "Home & Kitchen": ["Coffee Maker", "Blender", "Cookware Set", "Smart Speaker"],
        "Outdoor": ["Tent", "Hiking Boots", "Backpack", "Camping Chair"],
        "Gaming": ["Gaming Console", "Video Game", "Gaming Headset", "Gaming Mouse"]
    }

    print(f"\n--- User History: {user_history} ---")

    # Recommendation Logic: Frequency-based
    # Suggestion: Count the frequency of each category in the user's history.
    # Transparency/Explainability: This logic is simple: users like what they've interacted with most.
    category_counts = Counter(user_history)
    print(f"Category frequency in history: {category_counts}")

    if not category_counts:
        # Ethical Consideration: For new users or users with sparse history, provide diverse general recommendations.
        # Avoid immediately pushing popular items which can create filter bubbles for new users.
        return product_catalog.get("Electronics", []) + product_catalog.get("Books", []) # Simple default for no history

    # Get the most frequently interacted-with category
    most_common_category, _ = category_counts.most_common(1)[0]
    print(f"Most common category: {most_common_category}")

    # Recommend products from the most common category
    # Transparency/Explainability: We are recommending items directly related to their past strong interest.
    recommendations = product_catalog.get(most_common_category, [])

    # Potential Fairness Issue: Popularity Bias
    # Recommending only from the most popular items within a category can lead to popularity bias.
    # Items that are already popular get more visibility, making them even more popular, while niche or new items are overlooked.
    # Bias Effect: Reduces diversity, creates 'rich-get-richer' dynamics, and can prevent users from discovering less-known but relevant products.

    # Potential Fairness Issue: Filter Bubbles / Echo Chambers
    # Only recommending based on past similar items can trap users in a 'filter bubble'.
    # They are only exposed to information/products that align with their existing preferences, limiting their perspective.
    # Bias Effect: Decreases serendipity, limits user exploration, and can reinforce existing biases.

    # Potential Fairness Issue: Exclusion of New or Niche Products
    # Simple frequency-based systems struggle to recommend new products (cold start problem) or items from niche categories.
    # Bias Effect: Disadvantages smaller businesses, independent creators, or less mainstream interests.

    # Strategy to Promote Ethical Recommendations: Diversify Suggestions
    # Suggestion: Introduce recommendations from related or complementary categories, or even some randomly selected diverse items.
    # This helps break filter bubbles and expose users to new possibilities.
    # Ethical Practice: Ensure a balance between relevance and discovery.
    if most_common_category == "Books":
        recommendations.extend(product_catalog.get("Home & Kitchen", [])[:1]) # Suggest a complementary category item
    elif most_common_category == "Electronics":
        recommendations.extend(product_catalog.get("Gaming", [])[:1])
    # Developer Responsibility: Implement mechanisms for exploration (e.g., 'customers also bought', 'trending now', 'new arrivals').

    # Strategy to Promote Ethical Recommendations: Avoid Discrimination
    # While this simple model doesn't explicitly use demographic data, more complex systems might.
    # Ethical Practice: Ensure recommendation algorithms do not discriminate based on protected characteristics (gender, race, age, etc.).
    # Bias Effect: If demographic data is implicitly or explicitly used, recommendations could be unfairly withheld or skewed for certain groups.
    # Developer Responsibility: Audit algorithms for disparate impact and ensure fairness metrics are monitored.

    # Strategy to Promote Ethical Recommendations: Transparency and Control
    # Ethical Practice: Users should understand why they are seeing certain recommendations and have control over their preferences.
    # For instance, allowing users to 'dislike' recommendations or modify their interests.
    # Developer Responsibility: Provide clear explanations of how recommendations are generated and offer user-facing controls.

    # Strategy to Promote Ethical Recommendations: Not Exploiting User Behavior
    # Ethical Practice: Avoid recommending products that exploit user vulnerabilities (e.g., addictive behaviors, financial distress).
    # Developer Responsibility: Design systems with user well-being in mind, not just engagement or profit maximization.

    # Final Recommendations (simplified, showing a mix)
    # Ethical Consideration: Prioritize diversity over pure frequency in some cases to broaden user experience.
    final_recommendations = list(set(recommendations)) # Remove duplicates
    final_recommendations = final_recommendations[:5] # Limit to a reasonable number

    return final_recommendations


# Main execution block for demonstration
if __name__ == "__main__":
    # Example User Histories
    user_history_1 = ["Books", "Books", "Fiction Novel", "Electronics", "Books"]
    user_history_2 = ["Clothing", "Clothing", "Jeans", "Outdoor", "Clothing"]
    user_history_3 = ["Gaming", "Gaming Console"]
    user_history_4 = [] # New user or no history

    print("\n--- Recommendation for User History 1 ---")
    recs1 = recommend_products(user_history_1)
    print(f"Recommended Products: {recs1}")

    print("\n--- Recommendation for User History 2 ---")
    recs2 = recommend_products(user_history_2)
    print(f"Recommended Products: {recs2}")

    print("\n--- Recommendation for User History 3 ---")
    recs3 = recommend_products(user_history_3)
    print(f"Recommended Products: {recs3}")

    print("\n--- Recommendation for User History 4 (New User) ---")
    recs4 = recommend_products(user_history_4)
    print(f"Recommended Products: {recs4}")



--- Recommendation for User History 1 ---

--- User History: ['Books', 'Books', 'Fiction Novel', 'Electronics', 'Books'] ---
Category frequency in history: Counter({'Books': 3, 'Fiction Novel': 1, 'Electronics': 1})
Most common category: Books
Recommended Products: ['Biography', 'Self-Help Book', 'Coffee Maker', 'Science Fiction', 'Fiction Novel']

--- Recommendation for User History 2 ---

--- User History: ['Clothing', 'Clothing', 'Jeans', 'Outdoor', 'Clothing'] ---
Category frequency in history: Counter({'Clothing': 3, 'Jeans': 1, 'Outdoor': 1})
Most common category: Clothing
Recommended Products: ['Jeans', 'T-Shirt', 'Jacket', 'Sneakers']

--- Recommendation for User History 3 ---

--- User History: ['Gaming', 'Gaming Console'] ---
Category frequency in history: Counter({'Gaming': 1, 'Gaming Console': 1})
Most common category: Gaming
Recommended Products: ['Gaming Console', 'Gaming Mouse', 'Gaming Headset', 'Video Game']

--- Recommendation for User History 4 (New User) ---

--- U

In [5]:
# Python function for basic sentiment analysis with ethical and bias considerations

def analyze_sentiment(text: str) -> str:
    """Performs basic sentiment analysis on a given text string (Positive, Negative, or Neutral).

    This function uses a simple keyword-based approach and includes inline comments to highlight
    ethical considerations, potential biases, and developer responsibilities in AI-assisted
    sentiment analysis.

    Args:
        text (str): The input text string to analyze.

    Returns:
        str: The sentiment label ('Positive', 'Negative', 'Neutral').
    """

    # Convert text to lowercase for case-insensitive matching
    processed_text = text.lower()

    # Suggestion: Define lists of positive, negative, and neutral keywords.
    # Ethical Consideration: Keyword lists themselves can be a source of bias.
    # For example, terms that are positive in one cultural context might be neutral or even negative in another.
    # Developer Responsibility: Regularly review and update keyword lists, considering cultural nuances and diverse linguistic use.
    positive_keywords = ['good', 'great', 'excellent', 'happy', 'love', 'amazing', 'fantastic', 'wonderful', 'joy', 'benefit', 'succeed']
    negative_keywords = ['bad', 'terrible', 'horrible', 'sad', 'hate', 'awful', 'poor', 'fail', 'problem', 'difficult', 'crisis']

    # Bias Source: Language Bias (English-centric)
    # This simple approach is inherently biased towards English. Languages have different structures, idioms, and sentiment expressions.
    # A word-based model trained only on English will perform poorly or incorrectly for other languages.
    # Bias Effect: Leads to unfair or inaccurate sentiment classifications for non-English content, potentially marginalizing non-English speakers.

    # Bias Source: Cultural Bias
    # The perceived sentiment of a word or phrase can vary significantly across cultures.
    # E.g., 'challenging' might be negative in some contexts, but positive ('opportunity') in others.
    # Bias Effect: Sentiment classifications may reflect dominant cultural norms, leading to misinterpretations for texts from minority cultures.

    # Initialize sentiment scores
    positive_score = 0
    negative_score = 0

    # Check for positive keywords
    for keyword in positive_keywords:
        if keyword in processed_text:
            positive_score += 1

    # Check for negative keywords
    for keyword in negative_keywords:
        if keyword in processed_text:
            negative_score += 1

    # Bias Source: Sarcasm and Irony
    # Rule-based sentiment analysis struggles with sarcasm ("Oh, that's just *great*.") or irony, where words convey opposite meanings.
    # Bias Effect: Misclassifications due to literal interpretation can lead to inaccurate insights, impacting decisions based on sentiment.

    # Bias Source: Over-representation of Certain Viewpoints/Topics in Training Data (if using ML)
    # While this is a rule-based model, if keywords were derived from biased datasets, it carries that bias.
    # If an ML model were used, training data might disproportionately feature certain political, social, or demographic viewpoints.
    # Bias Effect: The model's sentiment prediction will be skewed towards the dominant viewpoints, suppressing or misinterpreting others.
    # This can lead to algorithmic unfairness and echo chambers.

    # Determine sentiment based on scores
    if positive_score > negative_score:
        sentiment = 'Positive'
    elif negative_score > positive_score:
        sentiment = 'Negative'
    else:
        # Suggestion: Acknowledge neutrality or ambiguity when scores are equal or low.
        sentiment = 'Neutral'

    # Strategy to Handle/Reduce Bias: Balanced Datasets
    # For ML models, ensure training data is representative across different demographics, languages, cultures, and viewpoints.
    # For rule-based models, ensure keyword lists are compiled from diverse sources and regularly reviewed by diverse teams.

    # Strategy to Handle/Reduce Bias: Neutral Wording and Contextual Analysis
    # Encourage users to provide context where possible. For automated systems, explore more advanced NLP techniques
    # that can understand context, negation, and discourse structures rather than just keywords.

    # Strategy to Handle/Reduce Bias: Human Review and Intervention
    # Implement human-in-the-loop systems, especially for high-stakes decisions. Humans can identify and correct biased outputs.
    # Regular audits of sentiment analysis results by diverse groups can reveal patterns of bias.

    # Strategy to Handle/Reduce Bias: Transparency and Explainability
    # Clearly communicate the limitations and potential biases of the sentiment analysis model to users.
    # Provide explanations for sentiment predictions (e.g., highlighting keywords) to allow for user understanding and correction.

    # Developer Responsibility: Continuous Monitoring and Ethical Sourcing
    # Developers are responsible for continuously monitoring model performance for disparate impact across groups.
    # They must ensure data for model development (and keyword lists) is ethically sourced, respecting privacy and consent.
    # Ethical AI: Prioritize fairness, accountability, and transparency in all stages of AI development and deployment.

    return sentiment

# Example Usage:
if __name__ == "__main__":
    text1 = "This movie was absolutely amazing and I loved every moment of it!"
    text2 = "The service was terrible and I had a horrible experience."
    text3 = "The weather today is neither good nor bad."
    text4 = "Oh, that's just *great* – another system crash. (Demonstrates sarcasm challenge)"

    print(f"Text: '{text1}' -> Sentiment: {analyze_sentiment(text1)}")
    print(f"Text: '{text2}' -> Sentiment: {analyze_sentiment(text2)}")
    print(f"Text: '{text3}' -> Sentiment: {analyze_sentiment(text3)}")
    print(f"Text: '{text4}' -> Sentiment: {analyze_sentiment(text4)}") # Will incorrectly be 'Positive' due to 'great'


Text: 'This movie was absolutely amazing and I loved every moment of it!' -> Sentiment: Positive
Text: 'The service was terrible and I had a horrible experience.' -> Sentiment: Negative
Text: 'The weather today is neither good nor bad.' -> Sentiment: Neutral
Text: 'Oh, that's just *great* – another system crash. (Demonstrates sarcasm challenge)' -> Sentiment: Positive
