<a href="https://colab.research.google.com/github/Vishwateja-123/AI-Assisted-Coding/blob/main/Lab_Assignment_5_4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [8]:
import re
import uuid
import hashlib

def is_valid_email(email: str) -> bool:
    pattern = r"^[\w\.-]+@[\w\.-]+\.\w+$"
    return re.match(pattern, email) is not None

def hash_email(email: str) -> str:
    return hashlib.sha256(email.encode()).hexdigest()

def mask_email(email: str) -> str:
    username, domain = email.split("@")
    return username[0] + "***@" + domain

def collect_user_data():
    print("Enter User Details")

    name = input("Enter Name: ")
    age = int(input("Enter Age: "))
    email = input("Enter Email: ")

    if not is_valid_email(email):
        print("Invalid Email Format!")
        return

    user_id = str(uuid.uuid4())
    email_hash = hash_email(email)
    masked = mask_email(email)

    user_data = {
        "user_id": user_id,
        "age": age,
        "email_hash": email_hash
    }

    print("\nStored User Data (Protected):")
    print(user_data)

    print("\nDisplay Version (Masked for UI Only):")
    print(masked)

collect_user_data()


Enter User Details
Enter Name: Vishwateja
Enter Age: 20
Enter Email: paidipellyvishwateja@gmail.com

Stored User Data (Protected):
{'user_id': '944e7b0e-5dfd-487a-9af5-5ad2afb175a6', 'age': 20, 'email_hash': '3c1614a57fb70ca6f19f9a77d0649d131290d4eefe4a96a069f041064eb7bbda'}

Display Version (Masked for UI Only):
p***@gmail.com


In [2]:
# -------------------- Task 2: Sentiment Analysis with Bias Awareness --------------------

training_data = [
    ("The product is excellent and I am happy", "Positive"),
    ("The service was terrible and made me sad", "Negative"),
    ("The experience was okay, nothing special", "Neutral")
]


def train_sentiment_model(data):
    """
    Simple keyword-based training.
    Bias mitigation:
    - balanced samples (positive/negative/neutral)
    - includes neutral to avoid forced polarity
    """
    positive_words = set()
    negative_words = set()

    for text, label in data:
        words = text.lower().split()

        if label == "Positive":
            positive_words.update(words)
        elif label == "Negative":
            negative_words.update(words)

    return positive_words, negative_words


def sentiment_analysis(text, positive_words, negative_words):
    text = text.lower()
    score = 0

    for word in text.split():
        if word in positive_words:
            score += 1
        elif word in negative_words:
            score -= 1

    if score > 0:
        return "Positive"
    elif score < 0:
        return "Negative"
    else:
        return "Neutral"


# Training phase
positive_words, negative_words = train_sentiment_model(training_data)

# Testing phase
test_reviews = [
    "I am happy with the product",
    "This was a terrible experience",
    "The service is okay"
]

print("Sentiment Analysis Results:\n")
for review in test_reviews:
    result = sentiment_analysis(review, positive_words, negative_words)
    print("Review:", review)
    print("Predicted Sentiment:", result, "\n")


Sentiment Analysis Results:

Review: I am happy with the product
Predicted Sentiment: Positive 

Review: This was a terrible experience
Predicted Sentiment: Negative 

Review: The service is okay
Predicted Sentiment: Positive 



In [3]:
# -------------------- Task 3: Ethical Product Recommendation System --------------------

user_history = ["Laptop", "Phone", "Laptop"]


def recommend_products(user_history):
    """
    Ethical Guidelines:
    - Transparency: logic is clear
    - Fairness: avoids only popular/frequent items bias
    - User control: can give feedback later
    """

    all_products = ["Laptop", "Phone", "Tablet", "Headphones"]
    recommendations = []

    # Recommend only what user didn't buy yet
    for product in all_products:
        if product not in user_history:
            recommendations.append(product)

    return recommendations


recommended_items = recommend_products(user_history)

print("Recommended Products:\n")
for item in recommended_items:
    print("-", item)

print("\nUser Feedback:")
print("You can like or dislike recommendations to improve fairness.")


Recommended Products:

- Tablet
- Headphones

User Feedback:
You can like or dislike recommendations to improve fairness.


In [13]:
import logging
import sys

# ✅ Force reset root logger (works even if logging already configured)
logger = logging.getLogger()          # root logger
logger.handlers.clear()               # remove existing handlers
logger.setLevel(logging.INFO)         # set level to INFO

handler = logging.StreamHandler(sys.stdout)
formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s")
handler.setFormatter(formatter)

logger.addHandler(handler)

def log_user_action(user_id, action):
    logging.info(f"Action performed: {action} | User ID: {user_id}")

log_user_action(101, "Login Successful")
log_user_action(101, "Viewed Product Page")



2026-02-12 05:05:33,382 - INFO - Action performed: Login Successful | User ID: 101
2026-02-12 05:05:33,387 - INFO - Action performed: Viewed Product Page | User ID: 101


In [12]:
"""
=========================================================
RESPONSIBLE AI MODEL DOCUMENTATION
=========================================================

Model Name: Sentiment Analysis Classifier
Model Type: Logistic Regression
Purpose: Classify text into Positive / Negative sentiment

---------------- RESPONSIBLE USAGE GUIDELINES ----------------

1) Explainability:
- This model uses TF-IDF + Logistic Regression.
- Predictions are based on word frequency patterns.
- Model does NOT understand real human meaning or emotions.

2) Accuracy Limits:
- Accuracy depends on training data quality.
- Model may fail on sarcasm, mixed emotions, or slang.
- Should NOT be used for critical decision making.

3) Bias Risk:
- If training data is biased → predictions will be biased.
- Must test model on diverse datasets.

4) Ethical Use:
- Do NOT use for hiring decisions.
- Do NOT use for mental health diagnosis.
- Do NOT use for sensitive personal profiling.

5) Human Oversight:
- Always include human review for important decisions.
"""

# ----------------------------------------------------------
# IMPORT LIBRARIES
# ----------------------------------------------------------

import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split


# ----------------------------------------------------------
# SAMPLE DATASET
# ----------------------------------------------------------

data = {
    "text": [
        "I love this product",
        "This is very bad",
        "Amazing quality",
        "Worst experience",
        "Very good service",
        "Not satisfied"
    ],
    "sentiment": [
        "Positive",
        "Negative",
        "Positive",
        "Negative",
        "Positive",
        "Negative"
    ]
}

df = pd.DataFrame(data)


# ----------------------------------------------------------
# TRAIN MODEL
# ----------------------------------------------------------

def train_model(df):
    """
    RESPONSIBLE MODEL TRAINING NOTES:
    - Dataset should be balanced.
    - Dataset should be checked for offensive or biased content.
    - Dataset should represent diverse user groups.
    """

    X = df["text"]
    y = df["sentiment"]

    vectorizer = TfidfVectorizer()
    X_vector = vectorizer.fit_transform(X)

    X_train, X_test, y_train, y_test = train_test_split(
        X_vector, y, test_size=0.2, random_state=42
    )

    model = LogisticRegression()
    model.fit(X_train, y_train)

    accuracy = model.score(X_test, y_test)
    print("Model Accuracy:", accuracy)

    return model, vectorizer


# ----------------------------------------------------------
# PREDICTION FUNCTION
# ----------------------------------------------------------

def predict_sentiment(model, vectorizer, text):
    """
    RESPONSIBLE PREDICTION NOTES:
    - Prediction is probabilistic, not 100% correct.
    - Should not be used for sensitive decisions.
    - User text should not be stored without consent.
    """

    text_vector = vectorizer.transform([text])
    prediction = model.predict(text_vector)
    return prediction[0]


# ----------------------------------------------------------
# RUN MODEL
# ----------------------------------------------------------

model, vectorizer = train_model(df)

test_text = "This product is good"
result = predict_sentiment(model, vectorizer, test_text)

print("Predicted Sentiment:", result)


2026-02-12 04:56:03,508 - INFO - NumExpr defaulting to 2 threads.
Model Accuracy: 0.5
Predicted Sentiment: Positive
