In [3]:
!pip install python-dotenv


Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.0


In [2]:
pip install openai==0.28

Collecting openai==0.28
  Downloading openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Downloading openai-0.28.0-py3-none-any.whl (76 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/76.5 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.5/76.5 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 1.86.0
    Uninstalling openai-1.86.0:
      Successfully uninstalled openai-1.86.0
Successfully installed openai-0.28.0


In [4]:
!pip install langchain openai tqdm




In [None]:
!pip install langchain langchain-community openai



In [None]:
pip install openai langchain langchain-openai python-dotenv pandas tqdm


Collecting openai
  Using cached openai-1.86.0-py3-none-any.whl.metadata (25 kB)
Using cached openai-1.86.0-py3-none-any.whl (730 kB)
Installing collected packages: openai
  Attempting uninstall: openai
    Found existing installation: openai 0.28.0
    Uninstalling openai-0.28.0:
      Successfully uninstalled openai-0.28.0
Successfully installed openai-1.86.0


In [None]:
api_key = "sk-xxx"  # Replace with your key


In [None]:
pip install deepseek



In [None]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
import openai
from tqdm import tqdm
import zipfile

# --- Load Keys ---
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"  # Replace if needed
print("✅ Loaded OpenAI Key:", bool(api_key))

# ✅ LangChain LLM (explicit API key)
llm = ChatOpenAI(model="gpt-4", temperature=0.5, openai_api_key=api_key)

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, '/content/whatsapp_chat_analysis.zip') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("❌ No .txt file found in ZIP archive.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = []
    for line in chat_data.split("\n"):
        match = pattern.match(line)
        if match:
            sender, msg = match.groups()
            if msg.strip() and "media omitted" not in msg.lower():
                messages.append(f"{sender}: {msg}")
    return messages


# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format as:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template)
    formatted_prompt = prompt.format(chat=recent)
    response = llm.invoke(formatted_prompt)

    try:
        return json.loads(response.content)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in chat analysis. Raw output:\n", response.content)
        return response.content

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    return pd.read_csv(csv_path)

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0-100, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template)
    formatted_prompt = prompt.format(data=readable)
    response = llm.invoke(formatted_prompt)

    try:
        json_data = json.loads(response.content)
        return validate_screen_time_json(json_data)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", response.content)
        return response.content

# Validation function for clarity_score
def validate_screen_time_json(data):
    if not isinstance(data, dict):
        print("⚠️ Screen time data not a dict, returning None")
        return None
    if "clarity_score" not in data:
        print("⚠️ 'clarity_score' missing, setting default 50")
        data["clarity_score"] = 50
    else:
        try:
            val = int(data["clarity_score"])
            data["clarity_score"] = max(0, min(100, val))
        except Exception:
            print("⚠️ 'clarity_score' invalid, setting default 50")
            data["clarity_score"] = 50
    return data

# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            return res['choices'][0]['message']['content'].strip()
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template=prompt_template
    )
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )
    response = llm.invoke(formatted_prompt)
    return response.content

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "/content/screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen)

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)

    except Exception as e:
        print("❌ Pipeline failed:", e)


✅ Loaded OpenAI Key: True
❌ Failed to extract chat: ZipFile requires mode 'r', 'w', 'x', or 'a'
❌ Pipeline failed: '"clarity_score"'


In [None]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai
from tqdm import tqdm
import zipfile
import time

# Load environment variables and OpenAI key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"
print("✅ Loaded OpenAI Key:", bool(api_key))

# Initialize LangChain LLM with explicit API key
llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("/content/whatsapp_chat_analysis.zip.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

    # Join multiline messages: lines NOT starting with date pattern belong to previous line
    lines = chat_data.split('\n')
    merged_lines = []
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ')
    buffer = ""
    for line in lines:
        if date_pattern.match(line):
            if buffer:
                merged_lines.append(buffer)
            buffer = line
        else:
            buffer += " " + line.strip()
    if buffer:
        merged_lines.append(buffer)

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = []
    for line in merged_lines:
        match = pattern.match(line)
        if match:
            sender, msg = match.groups()
            if msg.strip() and "media omitted" not in msg.lower():
                messages.append(f"{sender}: {msg}")
    print(f"✅ Extracted {len(messages)} messages from chat")
    return messages

# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template_str = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format as:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template_str)
    formatted_prompt = prompt.format(chat=recent)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        return json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in chat analysis. Raw output:\n", text)
        return text

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    try:
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded screen time data: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    def validate_screen_time_json(data):
      if not isinstance(data, dict):
        print("⚠️ Screen time data not a dict, returning None")
        return None
    df["clarity_score"]
    results.get("clarity_score", 50)

    if "clarity_score" in df.columns:
        clarity = df["clarity_score"]
    else:
        print("Column 'clarity_score' not found.")
        clarity = None  # or some default value
        try:
            val = int(data["clarity_score"])
            data["clarity_score"] = max(0, min(100, val))
        except Exception:
            print("⚠️ 'clarity_score' invalid type, setting default 50")
            data["clarity_score"] = 50

    # Optional: Check other keys if necessary and fill defaults or clean

    return data


# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            sentiment = res['choices'][0]['message']['content'].strip()
            # Normalize output
            if sentiment.lower() not in ["positive", "negative", "neutral"]:
                return "Neutral"
            return sentiment
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template_str = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template=prompt_template_str
    )
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))
    return text

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)

    except Exception as e:
        print(" Pipeline :", e)

    import json

# --- INPUT JSON ---
data = '''
{
  "mood": "Stressed and anxious with periods of joy",
  "top_issues": ["Overuse of social media", "Sleep deprivation", "Lack of mental clarity"],
  "recommended_movies": ["Inside Out", "The Pursuit of Happyness", "Soul"],
  "recommended_songs": ["Weightless - Marconi Union", "Lovely Day - Bill Withers", "Here Comes the Sun - The Beatles"],
  "habits": ["Daily journaling", "30-minute screen-free walk", "Night-time digital detox routine"]
}
'''

# --- LOAD JSON INTO PYTHON DICTIONARY ---
analysis = json.loads(data)

# --- PRINT IN A CLEAN FORMAT ---
print("\n🧠 Mood Summary:")
print(f"  - Mood: {analysis['mood']}")

print("\n🚩 Top Issues Detected:")
for issue in analysis['top_issues']:
    print(f"  - {issue}")

print("\n🎬 Recommended Movies:")
for movie in analysis['recommended_movies']:
    print(f"  - {movie}")

print("\n🎵 Recommended Songs:")
for song in analysis['recommended_songs']:
    print(f"  - {song}")

print("\n🌿 Suggested Mental Health Habits:")
for habit in analysis['habits']:
    print(f"  - {habit}")


def analyze_screen_time(df):
    readable = df.to_string(index=False)

    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", text)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # ✅ Validation & fallback handling
    if not isinstance(data, dict):
        print("⚠️ Screen time JSON result is not a dictionary.")
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # Clamp clarity score between 0–100
    try:
        val = int(data.get("clarity_score", 50))
        data["clarity_score"] = max(0, min(100, val))
    except Exception:
        print("⚠️ Invalid clarity_score, setting default 50")
        data["clarity_score"] = 50

    return data
print(f"🧠 Mood: {analysis['mood']} | 🚩 Issues: {', '.join(analysis['top_issues'])} | 🎬 Movies: {', '.join(analysis['recommended_movies'])} | 🎵 Songs: {', '.join(analysis['recommended_songs'])} | 🌿 Habits: {', '.join(analysis['habits'])}")








✅ Loaded OpenAI Key: True
🔍 Files in ZIP: ['whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/1.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/10.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/11.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/12.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/13.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/14.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/15.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/16.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/17.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/18.png', 'what

  llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)


In [None]:
import os
import re
import json
import pandas as pd
from google.colab import files
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai
from tqdm import tqdm
import zipfile
import requests
from IPython.display import display, Markdown

# Install required packages
!pip install python-dotenv langchain openai pandas tqdm requests

# --- Setup for Google Colab ---
# Upload your files
print("Please upload your files:")
uploaded = files.upload()

# Get the uploaded file names
uploaded_files = list(uploaded.keys())
print("Uploaded files:", uploaded_files)

# Set up environment variables
os.environ["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"] = ""  # You'll set this below
os.environ["DEEPSEEK_API_KEY"] = ""  # You'll set this below

# --- API Key Setup ---
print("\n🔑 API Key Setup")
print("1. Get your OpenAI API key from: https://platform.openai.com/api-keys")
print("2. Get your DeepSeek API key from their website (if using DeepSeek)")

OPENAI_API_KEY = input("Enter your OpenAI API key (or press Enter to skip): ").strip()
DEEPSEEK_API_KEY = input("Enter your DeepSeek API key (or press Enter to skip): ").strip()

if not OPENAI_API_KEY and not DEEPSEEK_API_KEY:
    print("❌ Error: You need at least one API key to continue")
    exit()

if OPENAI_API_KEY:
    os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
if DEEPSEEK_API_KEY:
    os.environ["DEEPSEEK_API_KEY"] = DEEPSEEK_API_KEY

# Initialize models
llm_gpt = None
if OPENAI_API_KEY:
    try:
        llm_gpt = ChatOpenAI(
            model_name="gpt-3.5-turbo",
            temperature=0.5,
            openai_api_key=OPENAI_API_KEY
        )
        print("✅ OpenAI GPT initialized successfully")
    except Exception as e:
        print(f"❌ Failed to initialize OpenAI: {e}")

# --- DeepSeek Helper Functions ---
def deepseek_completion(prompt, model="deepseek-chat"):
    if not DEEPSEEK_API_KEY:
        return "DeepSeek API key not available"

    headers = {
        "Authorization": f"Bearer {DEEPSEEK_API_KEY}",
        "Content-Type": "application/json"
    }
    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}],
        "temperature": 0.5
    }

    try:
        response = requests.post(
            "https://api.deepseek.com/v1/chat/completions",
            headers=headers,
            json=payload,
            timeout=30
        )
        response.raise_for_status()
        return response.json()["choices"][0]["message"]["content"]
    except Exception as e:
        print(f"❌ DeepSeek API error: {e}")
        return ""

# --- Model Selection Helper ---
def get_llm_response(prompt, model="deepseek"):
    if model.lower() == "gpt" and llm_gpt:
        try:
            response = llm_gpt([HumanMessage(content=prompt)])
            return response[0].content if isinstance(response, list) else str(response)
        except Exception as e:
            print(f"❌ OpenAI error: {e}")
            return ""
    elif model.lower() == "deepseek":
        return deepseek_completion(prompt)
    else:
        return "Invalid model selected"

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    if not os.path.exists(zip_path):
        print(f"❌ WhatsApp file not found: {zip_path}")
        return []

    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            txt_files = [f for f in zip_ref.namelist() if f.endswith('.txt')]
            if not txt_files:
                return []
            with zip_ref.open(txt_files[0]) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except Exception as e:
        print("❌ Error extracting chat:", e)
        return []

    lines = chat_data.split('\n')
    merged_lines = []
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ')
    buffer = ""
    for line in lines:
        if date_pattern.match(line):
            if buffer:
                merged_lines.append(buffer)
            buffer = line
        else:
            buffer += " " + line.strip()
    if buffer:
        merged_lines.append(buffer)

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = [f"{m.group(1)}: {m.group(2)}" for m in map(pattern.match, merged_lines) if m and "media omitted" not in m.group(2).lower()]
    return messages

# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50, model="deepseek"):
    if not messages:
        return {"error": "No messages to analyze"}

    recent = "\n".join(messages[-n:])
    prompt_template = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...]}

Chat:
{chat}
"""
    prompt = prompt_template.format(chat=recent)
    response = get_llm_response(prompt, model)
    try:
        return json.loads(response)
    except:
        print("⚠️ Chat JSON parse error. Raw response:\n", response)
        return {"error": "Failed to parse analysis"}

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    if not os.path.exists(csv_path):
        print(f"❌ Screen time file not found: {csv_path}")
        return pd.DataFrame()

    try:
        return pd.read_csv(csv_path)
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df, model="deepseek"):
    if df.empty:
        return {"error": "No screen time data"}

    readable = df.to_string(index=False)
    prompt_template = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...]}

Screen Time Data:
{data}
"""
    prompt = prompt_template.format(data=readable)
    response = get_llm_response(prompt, model)
    try:
        data = json.loads(response)
        data["clarity_score"] = max(0, min(100, int(data.get("clarity_score", 50))))
        return data
    except:
        print("⚠️ Screen time JSON parse error. Raw response:\n", response)
        return {"error": "Failed to parse analysis"}

# --- Twitter Sentiment Analysis ---
def analyze_tweets(df, model="deepseek"):
    if df.empty:
        print("❌ No tweet data available")
        return pd.DataFrame()

    tweet_col = next((col for col in df.columns if col.lower() in ["tweet", "text", "message", "content"]), None)
    if not tweet_col:
        tweet_col = df.select_dtypes(include='object').columns[0]

    def analyze_sentiment(tweet):
        prompt = f'Tweet: "{tweet}"\nClassify as one word: Positive, Negative, or Neutral.'
        response = get_llm_response(prompt, model)
        sentiment = response.strip().capitalize()
        return sentiment if sentiment in ["Positive", "Negative", "Neutral"] else "Neutral"

    tqdm.pandas(desc="Analyzing tweets")
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment)
    return df

# --- Final Report ---
def synthesize_report(chat_json, screen_json, sentiment_df, model="deepseek"):
    if isinstance(chat_json, str) or "error" in chat_json:
        chat_json = {"error": "No chat analysis available"}
    if isinstance(screen_json, str) or "error" in screen_json:
        screen_json = {"error": "No screen time analysis available"}
    if sentiment_df.empty:
        sentiment_summary = {"error": "No sentiment analysis available"}
    else:
        sentiment_summary = sentiment_df["sentiment"].value_counts().to_dict()

    prompt = """
You are a NeuroAI advisor. Create a comprehensive mental health report based on:

1. WhatsApp Analysis:
{chat_json}

2. Screen Time Report:
{screen_json}

3. Twitter Sentiment:
{sentiments}

Include these sections:
- Overall mood assessment
- Top 3 concerns
- Recommended media (movies/songs)
- 3 personalized daily habits
- Digital wellness suggestions

Write in a compassionate, professional tone.
"""
    full_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2),
        screen_json=json.dumps(screen_json, indent=2),
        sentiments=json.dumps(sentiment_summary, indent=2)
    )
    return get_llm_response(full_prompt, model)

# --- MAIN PIPELINE ---
def main():
    # Input files configuration (use the names you uploaded)
    input_files = {
        "whatsapp": None,
        "screen_time": None,
        "twitter": None
    }

    # Match uploaded files to expected types
    for filename in uploaded_files:
        if 'whatsapp' in filename.lower() or filename.endswith('.zip'):
            input_files["whatsapp"] = filename
        elif 'screen' in filename.lower() or filename.endswith('.csv'):
            input_files["screen_time"] = filename
        elif 'tweet' in filename.lower() or filename.endswith('.csv'):
            input_files["twitter"] = filename

    # Verify we found all required files
    missing_files = [name for name, path in input_files.items() if path is None]
    if missing_files:
        print(f"❌ Could not identify these required files: {', '.join(missing_files)}")
        print("Please ensure your uploaded files contain these keywords in their names:")
        print("- whatsapp (or .zip) for WhatsApp chat")
        print("- screen (or .csv) for screen time data")
        print("- tweet (or .csv) for Twitter data")
        return

    # Check available models
    available_models = []
    if llm_gpt:
        available_models.append("gpt")
    if DEEPSEEK_API_KEY:
        available_models.append("deepseek")

    if not available_models:
        print("❌ No available models. Please check your API keys.")
        return

    # Model selection
    print(f"\nAvailable models: {', '.join(available_models)}")
    model_choice = input("Choose model: ").strip().lower()
    while model_choice not in available_models:
        print(f"Invalid choice. Please select from: {', '.join(available_models)}")
        model_choice = input("Choose model: ").strip().lower()

    try:
        print("\n🔍 Starting analysis...")

        # WhatsApp analysis
        print("- Analyzing WhatsApp messages...")
        whatsapp_messages = extract_whatsapp_messages(input_files["whatsapp"])
        whatsapp_analysis = analyze_chat(whatsapp_messages, model=model_choice)

        # Screen time analysis
        print("- Analyzing screen time data...")
        screen_df = load_screen_time(input_files["screen_time"])
        screen_analysis = analyze_screen_time(screen_df, model=model_choice)

        # Twitter analysis
        print("- Analyzing tweets...")
        tweets_df = pd.read_csv(input_files["twitter"])
        sentiment_analysis = analyze_tweets(tweets_df, model=model_choice)

        # Generate final report
        print("\n📊 Generating final report...")
        report = synthesize_report(whatsapp_analysis, screen_analysis, sentiment_analysis, model=model_choice)

        # Display the report with nice formatting
        print("\n" + "="*50)
        print("🧠 TEEN MENTAL HEALTH REPORT".center(50))
        print("="*50)
        display(Markdown(report))
        print("="*50)

    except Exception as e:
        print(f"\n❌ Pipeline failed: {e}")

# Run the analysis
if __name__ == "__main__":
    main()

Please upload your files:


Saving TSA-teen age group, whatsapp_chat&screen_time_analysis.ipynb to TSA-teen age group, whatsapp_chat&screen_time_analysis (2).ipynb
Uploaded files: ['TSA-teen age group, whatsapp_chat&screen_time_analysis (2).ipynb']

🔑 API Key Setup
1. Get your OpenAI API key from: https://platform.openai.com/api-keys
2. Get your DeepSeek API key from their website (if using DeepSeek)
Enter your OpenAI API key (or press Enter to skip): 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
Enter your DeepSeek API key (or press Enter to skip): 
✅ OpenAI GPT initialized successfully
❌ Could not identify these required files: screen_time, twitter
Please ensure your uploaded files contain these keywords in their names:
- whatsapp (or .zip) for WhatsApp chat
- screen (or .csv) for screen time data
- tweet (or .csv) for Twitter data


In [12]:
pip install pandas python-dotenv mistral_ai openai tqdm


[31mERROR: Could not find a version that satisfies the requirement mistral_ai (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for mistral_ai[0m[31m
[0m

In [9]:
import os
import json
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai

# Load environment variables
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")

# Initialize OpenAI client
llm = openai.ChatCompletion()

# WhatsApp Chat Extraction
def extract_whatsapp_messages(zip_path):
    # Placeholder for actual extraction logic
    return ["User1: Hello!", "User2: Hi there!", "User1: How are you?"]

# WhatsApp Chat Analysis
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template_str = """
    You are a futuristic AI therapist from 2030.

    Analyze these WhatsApp messages:
    - Emotional tone (stress, joy, anxiety)
    - Mental clarity & decision style
    - Mindset type: proactive, reactive, balanced

    Recommend:
    - 3 apps/habits to avoid
    - 3 uplifting movies and songs
    - 3 good daily mental health habits

    Output ONLY in JSON format as:
    {"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }
    Chat:
    {chat}
    """
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template_str)
    formatted_prompt = prompt.format(chat=recent)

    response = llm.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": formatted_prompt}]
    )

    text = response['choices'][0]['message']['content']
    try:
        return json.loads(text)
    except json.JSONDecodeError:
        print("JSON parsing failed in chat analysis. Raw output:\n", text)
        return text

# Screen Time Analysis
def load_screen_time(csv_path):
    try:
        df = pd.read_csv(csv_path)
        print(f"Loaded screen time data: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template_str = """
    You are a digital wellness AI.

    Analyze this screen time data:
    - Focus vs distraction
    - Burnout, overuse, addiction
    - Decision fatigue signs

    Recommend:
    - Mental clarity (0-100)
    - Avoid apps
    - 3 inspiring movies and calming songs
    - 3 digital detox habits

    Output ONLY in JSON format as:
    {"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

    Screen Time Data:
    {data}
    """
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": formatted_prompt}]
    )

    text = response['choices'][0]['message']['content']
    try:
        data = json.loads(text)
        if not isinstance(data, dict):
            raise ValueError("Screen time JSON result is not a dictionary.")
        val = int(data.get("clarity_score", 50))
        data["clarity_score"] = max(0, min(100, val))
        return data
    except Exception as e:
        print("Error processing screen time analysis:", e)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

# Twitter Sentiment Analysis
def analyze_tweets(df):
    print("Columns in CSV:", df.columns.tolist())
    tweet_col = next((col for col in df.columns if col.strip().lower() in ["tweet", "text", "message", "content"]), None)
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.\n\nTweet: \"{tweet}\"\nSentiment:"
        try:
            res = llm.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            sentiment = res['choices'][0]['message']['content'].strip()
            return sentiment if sentiment.lower() in ["positive", "negative", "neutral"] else "Neutral"
        except Exception as e:
            print("Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# Final Report Synthesis
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template_str = """
    You are a NeuroAI fusion advisor from the future.

    Combine these:
    1. WhatsApp analysis:
    {chat_json}

    2. Screen time report:
    {screen_json}

    3. Twitter sentiment:
    {sentiments}

    Summarize teen mental health:
    - Mood and stress pattern
    - Top 3 issues
    - Mindfulness movie/song list
    - Futuristic habit suggestions

    Respond warmly and clearly.
    """
    prompt = PromptTemplate(input_variables=["chat_json", "screen_json", "sentiments"], template=prompt_template_str)
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )

    response = llm.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": formatted_prompt}]
    )

    return response['choices'][0]['message']['content']

# Main Pipeline
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\nFinal Mental Health Summary:\n", final_report)

    except Exception as e:
        print("Pipeline error:", e)



def analyze_screen_time(df):
    readable = df.to_string(index=False)

    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", text)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # ✅ Validation & fallback handling
    if not isinstance(data, dict):
        print("⚠️ Screen time JSON result is not a dictionary.")
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # Clamp clarity score between 0–100
    try:
        val = int(data.get("clarity_score", 50))
        data["clarity_score"] = max(0, min(100, val))
    except Exception:
        print("⚠️ Invalid clarity_score, setting default 50")
        data["clarity_score"] = 50

    return data
print(f"🧠 Mood: {analysis['mood']} | 🚩 Issues: {', '.join(analysis['top_issues'])} | 🎬 Movies: {', '.join(analysis['recommended_movies'])} | 🎵 Songs: {', '.join(analysis['recommended_songs'])} | 🌿 Habits: {', '.join(analysis['habits'])}")




Pipeline error: '"emotional_tone"'
🧠 Mood: Stressed and anxious with periods of joy | 🚩 Issues: Overuse of social media, Sleep deprivation, Lack of mental clarity | 🎬 Movies: Inside Out, The Pursuit of Happyness, Soul | 🎵 Songs: Weightless - Marconi Union, Lovely Day - Bill Withers, Here Comes the Sun - The Beatles | 🌿 Habits: Daily journaling, 30-minute screen-free walk, Night-time digital detox routine


In [16]:
import os
import json
import pandas as pd
from dotenv import load_dotenv
from tqdm import tqdm

# Load environment variables
load_dotenv()
mistral_api_key = os.getenv("MISTRAL_API_KEY")

# Mock Mistral client for demonstration purposes
class MistralClient:
    def __init__(self, api_key):
        self.api_key = api_key

    def generate(self, model, prompt, max_tokens):
        # Mock response
        return json.dumps({
            "emotional_tone": "Neutral",
            "clarity": "Clear",
            "mindset": "Balanced",
            "avoid": ["app1", "app2", "app3"],
            "recommend": {"movies": ["Movie1", "Movie2", "Movie3"], "songs": ["Song1", "Song2", "Song3"]},
            "habits": ["Habit1", "Habit2", "Habit3"]
        })

mistral_client = MistralClient(api_key=mistral_api_key)

# Example function to analyze chat using Mistral
def analyze_chat_with_mistral(messages, n=50):
    # Ensure that messages is a list and has elements
    if not isinstance(messages, list) or not messages:
        return {"error": "Invalid messages input"}

    # Join the last n messages
    recent = "\n".join(messages[-n:])

    # Define the prompt template
    prompt_template_str = """
    You are a futuristic AI therapist from 2030.

    Analyze these WhatsApp messages:
    - Emotional tone (stress, joy, anxiety)
    - Mental clarity & decision style
    - Mindset type: proactive, reactive, balanced

    Recommend:
    - 3 apps/habits to avoid
    - 3 uplifting movies and songs
    - 3 good daily mental health habits

    Output ONLY in JSON format as:
    {{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {{"movies": [...], "songs": [...]}}, "habits": [...] }}

    Chat:
    {chat}
    """

    # Format the prompt string with the recent messages
    prompt = prompt_template_str.format(chat=recent)

    # Hypothetical call to Mistral's API
    response = mistral_client.generate(
        model="mistral-model",
        prompt=prompt,
        max_tokens=150
    )

    try:
        return json.loads(response)
    except json.JSONDecodeError:
        print("JSON parsing failed in chat analysis. Raw output:\n", response)
        return {"error": "Failed to parse JSON response"}


# Example usage
messages = ["User1: Hello!", "User2: Hi there!", "User1: How are you?"]
analysis_result = analyze_chat_with_mistral(messages)
print(analysis_result)

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    try:
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded screen time data: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    formatted_prompt = prompt_template_str.format(data=readable)

    # Mock response for demonstration
    mock_response = json.dumps({
        "clarity_score": 75,
        "fatigue": "Low",
        "avoid_apps": ["App1", "App2", "App3"],
        "recommend": {"movies": ["Movie1", "Movie2", "Movie3"], "songs": ["Song1", "Song2", "Song3"]},
        "habits": ["Habit1", "Habit2", "Habit3"]
    })

    try:
        data = json.loads(mock_response)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", mock_response)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    if not isinstance(data, dict):
        print("⚠️ Screen time JSON result is not a dictionary.")
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    try:
        val = int(data.get("clarity_score", 50))
        data["clarity_score"] = max(0, min(100, val))
    except Exception:
        print("⚠️ Invalid clarity_score, setting default 50")
        data["clarity_score"] = 50

    return data

# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            # Mock response for demonstration
            sentiment = "Neutral"
            if sentiment.lower() not in ["positive", "negative", "neutral"]:
                return "Neutral"
            return sentiment
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template_str = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    formatted_prompt = prompt_template_str.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )

    # Mock response for demonstration
    mock_response = "Final mental health summary based on the provided data."

    return mock_response

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # Mock function for demonstration
        def extract_whatsapp_messages(file_path):
            return ["User1: Hello!", "User2: Hi there!", "User1: How are you?"]

        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat_with_mistral(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        # 3. Twitter Sentiment
        df_tweets = pd.DataFrame({"tweet": ["I love this!", "Feeling great today", "Not sure about this"]})
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)

    except Exception as e:
        print("Pipeline Error:", e)

# --- INPUT JSON ---
data = '''
{
  "mood": "Stressed and anxious with periods of joy",
  "top_issues": ["Overuse of social media", "Sleep deprivation", "Lack of mental clarity"],
  "recommended_movies": ["Inside Out", "The Pursuit of Happyness", "Soul"],
  "recommended_songs": ["Weightless - Marconi Union", "Lovely Day - Bill Withers", "Here Comes the Sun - The Beatles"],
  "habits": ["Daily journaling", "30-minute screen-free walk", "Night-time digital detox routine"]
}
'''

# --- LOAD JSON INTO PYTHON DICTIONARY ---
analysis = json.loads(data)

# --- PRINT IN A CLEAN FORMAT ---
print("\n🧠 Mood Summary:")
print(f"  - Mood: {analysis['mood']}")

print("\n🚩 Top Issues Detected:")
for issue in analysis['top_issues']:
    print(f"  - {issue}")

print("\n🎬 Recommended Movies:")
for movie in analysis['recommended_movies']:
    print(f"  - {movie}")

print("\n🎵 Recommended Songs:")
for song in analysis['recommended_songs']:
    print(f"  - {song}")

print("\n🌿 Suggested Mental Health Habits:")
for habit in analysis['habits']:
    print(f"  - {habit}")

print(f"🧠 Mood: {analysis['mood']} | 🚩 Issues: {', '.join(analysis['top_issues'])} | 🎬 Movies: {', '.join(analysis['recommended_movies'])} | 🎵 Songs: {', '.join(analysis['recommended_songs'])} | 🌿 Habits: {', '.join(analysis['habits'])}")


{'emotional_tone': 'Neutral', 'clarity': 'Clear', 'mindset': 'Balanced', 'avoid': ['app1', 'app2', 'app3'], 'recommend': {'movies': ['Movie1', 'Movie2', 'Movie3'], 'songs': ['Song1', 'Song2', 'Song3']}, 'habits': ['Habit1', 'Habit2', 'Habit3']}
❌ Failed to load screen time CSV: [Errno 2] No such file or directory: 'screentime_analysis.csv'
🔍 Columns in CSV: ['tweet']


100%|██████████| 3/3 [00:00<00:00, 5282.50it/s]


🧠 Final Mental Health Summary:
 Final mental health summary based on the provided data.

🧠 Mood Summary:
  - Mood: Stressed and anxious with periods of joy

🚩 Top Issues Detected:
  - Overuse of social media
  - Sleep deprivation
  - Lack of mental clarity

🎬 Recommended Movies:
  - Inside Out
  - The Pursuit of Happyness
  - Soul

🎵 Recommended Songs:
  - Weightless - Marconi Union
  - Lovely Day - Bill Withers
  - Here Comes the Sun - The Beatles

🌿 Suggested Mental Health Habits:
  - Daily journaling
  - 30-minute screen-free walk
  - Night-time digital detox routine
🧠 Mood: Stressed and anxious with periods of joy | 🚩 Issues: Overuse of social media, Sleep deprivation, Lack of mental clarity | 🎬 Movies: Inside Out, The Pursuit of Happyness, Soul | 🎵 Songs: Weightless - Marconi Union, Lovely Day - Bill Withers, Here Comes the Sun - The Beatles | 🌿 Habits: Daily journaling, 30-minute screen-free walk, Night-time digital detox routine



