In [1]:
!pip install python-dotenv


Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Downloading python_dotenv-1.1.0-py3-none-any.whl (20 kB)
Installing collected packages: python-dotenv
Successfully installed python-dotenv-1.1.0


In [2]:
!pip install langchain openai tqdm




In [3]:
!pip install langchain langchain-community openai

Collecting langchain-community
  Downloading langchain_community-0.3.25-py3-none-any.whl.metadata (2.9 kB)
Collecting langchain-core<1.0.0,>=0.3.58 (from langchain)
  Downloading langchain_core-0.3.65-py3-none-any.whl.metadata (5.8 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting langsmith<0.4,>=0.1

In [4]:
pip install openai langchain langchain-openai python-dotenv pandas tqdm


Collecting langchain-openai
  Downloading langchain_openai-0.3.23-py3-none-any.whl.metadata (2.3 kB)
Downloading langchain_openai-0.3.23-py3-none-any.whl (65 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m65.4/65.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: langchain-openai
Successfully installed langchain-openai-0.3.23


In [5]:
api_key = "sk-xxx"  # Replace with your key


In [None]:
def validate_screen_time_json(data):
    if not isinstance(data, dict):
        return None
    if "clarity_score" not in data:
        print("⚠️ clarity_score missing, setting default 50")
        data["clarity_score"] = 50  # default mid-score
    else:
        # Try to convert to int safely
        try:
            val = int(data["clarity_score"])
            data["clarity_score"] = max(0, min(100, val))  # clamp between 0-100
        except:
            print("⚠️ clarity_score invalid, setting default 50")
            data["clarity_score"] = 50
    return data


In [7]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate
import openai
from tqdm import tqdm
import zipfile

# --- Load Keys ---
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"  # Replace if needed
print("✅ Loaded OpenAI Key:", bool(api_key))

# ✅ LangChain LLM (explicit API key)
llm = ChatOpenAI(model="gpt-4", temperature=0.5, openai_api_key=api_key)

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, '/content/whatsapp_chat_analysis.zip') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("❌ No .txt file found in ZIP archive.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = []
    for line in chat_data.split("\n"):
        match = pattern.match(line)
        if match:
            sender, msg = match.groups()
            if msg.strip() and "media omitted" not in msg.lower():
                messages.append(f"{sender}: {msg}")
    return messages


# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format as:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template)
    formatted_prompt = prompt.format(chat=recent)
    response = llm.invoke(formatted_prompt)

    try:
        return json.loads(response.content)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in chat analysis. Raw output:\n", response.content)
        return response.content

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    return pd.read_csv(csv_path)

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0-100, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template)
    formatted_prompt = prompt.format(data=readable)
    response = llm.invoke(formatted_prompt)

    try:
        json_data = json.loads(response.content)
        return validate_screen_time_json(json_data)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", response.content)
        return response.content

# Validation function for clarity_score
def validate_screen_time_json(data):
    if not isinstance(data, dict):
        print("⚠️ Screen time data not a dict, returning None")
        return None
    if "clarity_score" not in data:
        print("⚠️ 'clarity_score' missing, setting default 50")
        data["clarity_score"] = 50
    else:
        try:
            val = int(data["clarity_score"])
            data["clarity_score"] = max(0, min(100, val))
        except Exception:
            print("⚠️ 'clarity_score' invalid, setting default 50")
            data["clarity_score"] = 50
    return data

# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            return res['choices'][0]['message']['content'].strip()
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template=prompt_template
    )
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )
    response = llm.invoke(formatted_prompt)
    return response.content

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "/content/screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen)

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)

    except Exception as e:
        print("❌ Pipeline failed:", e)


✅ Loaded OpenAI Key: True
❌ Failed to extract chat: ZipFile requires mode 'r', 'w', 'x', or 'a'
❌ Pipeline failed: '"clarity_score"'


In [None]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai
from tqdm import tqdm
import zipfile
import time

# Load environment variables and OpenAI key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"
print("✅ Loaded OpenAI Key:", bool(api_key))

# Initialize LangChain LLM with explicit API key
llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("/content/whatsapp_chat_analysis.zip.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

    # Join multiline messages: lines NOT starting with date pattern belong to previous line
    lines = chat_data.split('\n')
    merged_lines = []
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ')
    buffer = ""
    for line in lines:
        if date_pattern.match(line):
            if buffer:
                merged_lines.append(buffer)
            buffer = line
        else:
            buffer += " " + line.strip()
    if buffer:
        merged_lines.append(buffer)

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = []
    for line in merged_lines:
        match = pattern.match(line)
        if match:
            sender, msg = match.groups()
            if msg.strip() and "media omitted" not in msg.lower():
                messages.append(f"{sender}: {msg}")
    print(f"✅ Extracted {len(messages)} messages from chat")
    return messages

# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template_str = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format as:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template_str)
    formatted_prompt = prompt.format(chat=recent)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        return json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in chat analysis. Raw output:\n", text)
        return text

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    try:
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded screen time data: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    def validate_screen_time_json(data):
      if not isinstance(data, dict):
        print("⚠️ Screen time data not a dict, returning None")
        return None
    df["clarity_score"]
    results.get("clarity_score", 50)

    if "clarity_score" in df.columns:
        clarity = df["clarity_score"]
    else:
        print("Column 'clarity_score' not found.")
        clarity = None  # or some default value
        try:
            val = int(data["clarity_score"])
            data["clarity_score"] = max(0, min(100, val))
        except Exception:
            print("⚠️ 'clarity_score' invalid type, setting default 50")
            data["clarity_score"] = 50

    # Optional: Check other keys if necessary and fill defaults or clean

    return data


# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            sentiment = res['choices'][0]['message']['content'].strip()
            # Normalize output
            if sentiment.lower() not in ["positive", "negative", "neutral"]:
                return "Neutral"
            return sentiment
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template_str = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template=prompt_template_str
    )
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))
    return text

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)

    except Exception as e:
        print(" Pipeline :", e)





✅ Loaded OpenAI Key: True


  llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)


🔍 Files in ZIP: ['whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/1.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/10.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/11.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/12.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/13.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/14.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/15.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/16.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/17.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/18.png', 'whatsapp_chat_analysis-3b04f34

In [None]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai
from tqdm import tqdm
import zipfile
import time

In [None]:
# Load environment variables and OpenAI key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"
print("✅ Loaded OpenAI Key:", bool(api_key))

✅ Loaded OpenAI Key: True


In [None]:
# Initialize LangChain LLM with explicit API key
llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)

  llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)


In [None]:
# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("/content/whatsapp_chat_analysis.zip.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

In [None]:
# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("/content/whatsapp_chat_analysis.zip.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

    # Join multiline messages: lines NOT starting with date pattern belong to previous line
    lines = chat_data.split('\n')
    merged_lines = []
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ')
    buffer = ""
    for line in lines:
        if date_pattern.match(line):
            if buffer:
                merged_lines.append(buffer)
            buffer = line
        else:
            buffer += " " + line.strip()
    if buffer:
        merged_lines.append(buffer)

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = []
    for line in merged_lines:
        match = pattern.match(line)
        if match:
            sender, msg = match.groups()
            if msg.strip() and "media omitted" not in msg.lower():
                messages.append(f"{sender}: {msg}")
    print(f"✅ Extracted {len(messages)} messages from chat")
    return messages

In [None]:
# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template_str = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format as:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template_str)
    formatted_prompt = prompt.format(chat=recent)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        return json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in chat analysis. Raw output:\n", text)
        return text

In [None]:
# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    try:
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded screen time data: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    def validate_screen_time_json(data):
      if not isinstance(data, dict):
        print("⚠️ Screen time data not a dict, returning None")
        return None
    df["clarity_score"]
    results.get("clarity_score", 50)

    if "clarity_score" in df.columns:
        clarity = df["clarity_score"]
    else:
        print("Column 'clarity_score' not found.")
        clarity = None  # or some default value
        try:
            val = int(data["clarity_score"])
            data["clarity_score"] = max(0, min(100, val))
        except Exception:
            print("⚠️ 'clarity_score' invalid type, setting default 50")
            data["clarity_score"] = 50

    # Optional: Check other keys if necessary and fill defaults or clean

    return data

In [None]:
# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            sentiment = res['choices'][0]['message']['content'].strip()
            # Normalize output
            if sentiment.lower() not in ["positive", "negative", "neutral"]:
                return "Neutral"
            return sentiment
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

In [None]:
# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template_str = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template=prompt_template_str
    )
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))
    return text

In [None]:
# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)

    except Exception as e:
        print("✅  Pipeline =50:", e)

🔍 Files in ZIP: ['whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/1.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/10.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/11.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/12.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/13.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/14.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/15.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/16.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/17.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/18.png', 'whatsapp_chat_analysis-3b04f34

In [None]:
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n")
        print(final_report)

        # Save output
        with open("mental_health_summary.txt", "w", encoding="utf-8") as f:
            f.write(final_report)

    except Exception as e:
        print("❌ Pipeline failed:", e)


🔍 Files in ZIP: ['whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/1.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/10.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/11.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/12.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/13.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/14.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/15.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/16.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/17.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/18.png', 'whatsapp_chat_analysis-3b04f34

In [None]:
{
  "mood": "Stressed and anxious with periods of joy",
  "top_issues": ["Overuse of social media", "Sleep deprivation", "Lack of mental clarity"],
  "recommended_movies": ["Inside Out", "The Pursuit of Happyness", "Soul"],
  "recommended_songs": ["Weightless - Marconi Union", "Lovely Day - Bill Withers", "Here Comes the Sun - The Beatles"],
  "habits": ["Daily journaling", "30-minute screen-free walk", "Night-time digital detox routine"]
}


{'mood': 'Stressed and anxious with periods of joy',
 'top_issues': ['Overuse of social media',
  'Sleep deprivation',
  'Lack of mental clarity'],
 'recommended_movies': ['Inside Out', 'The Pursuit of Happyness', 'Soul'],
 'recommended_songs': ['Weightless - Marconi Union',
  'Lovely Day - Bill Withers',
  'Here Comes the Sun - The Beatles'],
 'habits': ['Daily journaling',
  '30-minute screen-free walk',
  'Night-time digital detox routine']}

In [None]:
import json

# --- INPUT JSON ---
data = '''
{
  "mood": "Stressed and anxious with periods of joy",
  "top_issues": ["Overuse of social media", "Sleep deprivation", "Lack of mental clarity"],
  "recommended_movies": ["Inside Out", "The Pursuit of Happyness", "Soul"],
  "recommended_songs": ["Weightless - Marconi Union", "Lovely Day - Bill Withers", "Here Comes the Sun - The Beatles"],
  "habits": ["Daily journaling", "30-minute screen-free walk", "Night-time digital detox routine"]
}
'''

# --- LOAD JSON INTO PYTHON DICTIONARY ---
analysis = json.loads(data)

# --- PRINT IN A CLEAN FORMAT ---
print("\n🧠 Mood Summary:")
print(f"  - Mood: {analysis['mood']}")

print("\n🚩 Top Issues Detected:")
for issue in analysis['top_issues']:
    print(f"  - {issue}")

print("\n🎬 Recommended Movies:")
for movie in analysis['recommended_movies']:
    print(f"  - {movie}")

print("\n🎵 Recommended Songs:")
for song in analysis['recommended_songs']:
    print(f"  - {song}")

print("\n🌿 Suggested Mental Health Habits:")
for habit in analysis['habits']:
    print(f"  - {habit}")



🧠 Mood Summary:
  - Mood: Stressed and anxious with periods of joy

🚩 Top Issues Detected:
  - Overuse of social media
  - Sleep deprivation
  - Lack of mental clarity

🎬 Recommended Movies:
  - Inside Out
  - The Pursuit of Happyness
  - Soul

🎵 Recommended Songs:
  - Weightless - Marconi Union
  - Lovely Day - Bill Withers
  - Here Comes the Sun - The Beatles

🌿 Suggested Mental Health Habits:
  - Daily journaling
  - 30-minute screen-free walk
  - Night-time digital detox routine


In [None]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai
from tqdm import tqdm
import zipfile
import time

# Load environment variables and OpenAI key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"
print("✅ Loaded OpenAI Key:", bool(api_key))

# Initialize LangChain LLM with explicit API key
llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("/content/whatsapp_chat_analysis.zip.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

    # Join multiline messages: lines NOT starting with date pattern belong to previous line
    lines = chat_data.split('\n')
    merged_lines = []
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ')
    buffer = ""
    for line in lines:
        if date_pattern.match(line):
            if buffer:
                merged_lines.append(buffer)
            buffer = line
        else:
            buffer += " " + line.strip()
    if buffer:
        merged_lines.append(buffer)

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = []
    for line in merged_lines:
        match = pattern.match(line)
        if match:
            sender, msg = match.groups()
            if msg.strip() and "media omitted" not in msg.lower():
                messages.append(f"{sender}: {msg}")
    print(f"✅ Extracted {len(messages)} messages from chat")
    return messages

# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template_str = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format as:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template_str)
    formatted_prompt = prompt.format(chat=recent)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        return json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in chat analysis. Raw output:\n", text)
        return text

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    try:
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded screen time data: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    def validate_screen_time_json(data):
      if not isinstance(data, dict):
        print("⚠️ Screen time data not a dict, returning None")
        return None
    df["clarity_score"]
    results.get("clarity_score", 50)

    if "clarity_score" in df.columns:
        clarity = df["clarity_score"]
    else:
        print("Column 'clarity_score' not found.")
        clarity = None  # or some default value
        try:
            val = int(data["clarity_score"])
            data["clarity_score"] = max(0, min(100, val))
        except Exception:
            print("⚠️ 'clarity_score' invalid type, setting default 50")
            data["clarity_score"] = 50

    # Optional: Check other keys if necessary and fill defaults or clean

    return data


# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            sentiment = res['choices'][0]['message']['content'].strip()
            # Normalize output
            if sentiment.lower() not in ["positive", "negative", "neutral"]:
                return "Neutral"
            return sentiment
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template_str = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template=prompt_template_str
    )
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))
    return text

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)

    except Exception as e:
        print(" Pipeline :", e)

        import json

# --- INPUT JSON ---
data = '''
{
  "mood": "Stressed and anxious with periods of joy",
  "top_issues": ["Overuse of social media", "Sleep deprivation", "Lack of mental clarity"],
  "recommended_movies": ["Inside Out", "The Pursuit of Happyness", "Soul"],
  "recommended_songs": ["Weightless - Marconi Union", "Lovely Day - Bill Withers", "Here Comes the Sun - The Beatles"],
  "habits": ["Daily journaling", "30-minute screen-free walk", "Night-time digital detox routine"]
}
'''

# --- LOAD JSON INTO PYTHON DICTIONARY ---
analysis = json.loads(data)

# --- PRINT IN A CLEAN FORMAT ---
print("\n🧠 Mood Summary:")
print(f"  - Mood: {analysis['mood']}")

print("\n🚩 Top Issues Detected:")
for issue in analysis['top_issues']:
    print(f"  - {issue}")

print("\n🎬 Recommended Movies:")
for movie in analysis['recommended_movies']:
    print(f"  - {movie}")

print("\n🎵 Recommended Songs:")
for song in analysis['recommended_songs']:
    print(f"  - {song}")

print("\n🌿 Suggested Mental Health Habits:")
for habit in analysis['habits']:
    print(f"  - {habit}")

✅ Loaded OpenAI Key: True
🔍 Files in ZIP: ['whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/1.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/10.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/11.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/12.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/13.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/14.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/15.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/16.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/17.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/18.png', 'what

In [None]:
def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template_str = """..."""  # unchanged
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        data = json.loads(text)
        # Validate clarity score
        try:
            val = int(data.get("clarity_score", 50))
            data["clarity_score"] = max(0, min(100, val))
        except Exception:
            data["clarity_score"] = 50
        return data
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed for screen time analysis. Raw output:\n", text)
        return {}










In [8]:
!pip install datasets transformers




In [9]:
pip install transformers datasets accelerate


Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.wh

In [None]:
{"text": "User: I feel hopeless sometimes.\nAI Therapist: I'm here for you. Let's understand what you're feeling."}
{"text": "User: I can't sleep at night.\nAI Therapist: That sounds tough. Have you tried calming routines before bed?"}
{"text": "User: I'm doing okay today.\nAI Therapist: That's great to hear. Celebrate the small wins."}


{'text': "User: I'm doing okay today.\nAI Therapist: That's great to hear. Celebrate the small wins."}

In [None]:
def analyze_screen_time(df):
    readable = df.to_string(index=False)

    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", text)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # ✅ Validation & fallback handling
    if not isinstance(data, dict):
        print("⚠️ Screen time JSON result is not a dictionary.")
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # Clamp clarity score between 0–100
    try:
        val = int(data.get("clarity_score", 50))
        data["clarity_score"] = max(0, min(100, val))
    except Exception:
        print("⚠️ Invalid clarity_score, setting default 50")
        data["clarity_score"] = 50

    return data


In [10]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai
from tqdm import tqdm
import zipfile
import time

# Load environment variables and OpenAI key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"
print("✅ Loaded OpenAI Key:", bool(api_key))

# Initialize LangChain LLM with explicit API key
llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("/content/whatsapp_chat_analysis.zip.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

    # Join multiline messages: lines NOT starting with date pattern belong to previous line
    lines = chat_data.split('\n')
    merged_lines = []
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ')
    buffer = ""
    for line in lines:
        if date_pattern.match(line):
            if buffer:
                merged_lines.append(buffer)
            buffer = line
        else:
            buffer += " " + line.strip()
    if buffer:
        merged_lines.append(buffer)

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = []
    for line in merged_lines:
        match = pattern.match(line)
        if match:
            sender, msg = match.groups()
            if msg.strip() and "media omitted" not in msg.lower():
                messages.append(f"{sender}: {msg}")
    print(f"✅ Extracted {len(messages)} messages from chat")
    return messages

# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template_str = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format as:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template_str)
    formatted_prompt = prompt.format(chat=recent)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        return json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in chat analysis. Raw output:\n", text)
        return text

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    try:
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded screen time data: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    def validate_screen_time_json(data):
      if not isinstance(data, dict):
        print("⚠️ Screen time data not a dict, returning None")
        return None
    df["clarity_score"]
    results.get("clarity_score", 50)

    if "clarity_score" in df.columns:
        clarity = df["clarity_score"]
    else:
        print("Column 'clarity_score' not found.")
        clarity = None  # or some default value
        try:
            val = int(data["clarity_score"])
            data["clarity_score"] = max(0, min(100, val))
        except Exception:
            print("⚠️ 'clarity_score' invalid type, setting default 50")
            data["clarity_score"] = 50

    # Optional: Check other keys if necessary and fill defaults or clean

    return data


# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            sentiment = res['choices'][0]['message']['content'].strip()
            # Normalize output
            if sentiment.lower() not in ["positive", "negative", "neutral"]:
                return "Neutral"
            return sentiment
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template_str = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template=prompt_template_str
    )
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))
    return text

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)

    except Exception as e:
        print(" Pipeline :", e)

    import json

# --- INPUT JSON ---
data = '''
{
  "mood": "Stressed and anxious with periods of joy",
  "top_issues": ["Overuse of social media", "Sleep deprivation", "Lack of mental clarity"],
  "recommended_movies": ["Inside Out", "The Pursuit of Happyness", "Soul"],
  "recommended_songs": ["Weightless - Marconi Union", "Lovely Day - Bill Withers", "Here Comes the Sun - The Beatles"],
  "habits": ["Daily journaling", "30-minute screen-free walk", "Night-time digital detox routine"]
}
'''

# --- LOAD JSON INTO PYTHON DICTIONARY ---
analysis = json.loads(data)

# --- PRINT IN A CLEAN FORMAT ---
print("\n🧠 Mood Summary:")
print(f"  - Mood: {analysis['mood']}")

print("\n🚩 Top Issues Detected:")
for issue in analysis['top_issues']:
    print(f"  - {issue}")

print("\n🎬 Recommended Movies:")
for movie in analysis['recommended_movies']:
    print(f"  - {movie}")

print("\n🎵 Recommended Songs:")
for song in analysis['recommended_songs']:
    print(f"  - {song}")

print("\n🌿 Suggested Mental Health Habits:")
for habit in analysis['habits']:
    print(f"  - {habit}")


def analyze_screen_time(df):
    readable = df.to_string(index=False)

    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", text)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # ✅ Validation & fallback handling
    if not isinstance(data, dict):
        print("⚠️ Screen time JSON result is not a dictionary.")
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # Clamp clarity score between 0–100
    try:
        val = int(data.get("clarity_score", 50))
        data["clarity_score"] = max(0, min(100, val))
    except Exception:
        print("⚠️ Invalid clarity_score, setting default 50")
        data["clarity_score"] = 50

    return data
print(f"🧠 Mood: {analysis['mood']} | 🚩 Issues: {', '.join(analysis['top_issues'])} | 🎬 Movies: {', '.join(analysis['recommended_movies'])} | 🎵 Songs: {', '.join(analysis['recommended_songs'])} | 🌿 Habits: {', '.join(analysis['habits'])}")








✅ Loaded OpenAI Key: True
🔍 Files in ZIP: ['whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/1.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/10.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/11.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/12.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/13.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/14.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/15.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/16.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/17.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/18.png', 'what

In [11]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai
from tqdm import tqdm
import zipfile
import time

# Load environment variables and OpenAI key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"
print("✅ Loaded OpenAI Key:", bool(api_key))

# Initialize LangChain LLM with explicit API key
llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            file_list = zip_ref.namelist()
            print("🔍 Files in ZIP:", file_list)
            txt_files = [f for f in file_list if f.endswith('.txt')]
            if not txt_files:
                print("/content/whatsapp_chat_analysis.zip.")
                return []
            chat_file = txt_files[0]
            with zip_ref.open(chat_file) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except FileNotFoundError:
        print(f"❌ ZIP file not found: {zip_path}")
        return []
    except Exception as e:
        print("❌ Failed to extract chat:", e)
        return []

    # Join multiline messages: lines NOT starting with date pattern belong to previous line
    lines = chat_data.split('\n')
    merged_lines = []
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ')
    buffer = ""
    for line in lines:
        if date_pattern.match(line):
            if buffer:
                merged_lines.append(buffer)
            buffer = line
        else:
            buffer += " " + line.strip()
    if buffer:
        merged_lines.append(buffer)

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = []
    for line in merged_lines:
        match = pattern.match(line)
        if match:
            sender, msg = match.groups()
            if msg.strip() and "media omitted" not in msg.lower():
                messages.append(f"{sender}: {msg}")
    print(f"✅ Extracted {len(messages)} messages from chat")
    return messages

# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template_str = """
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format as:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    prompt = PromptTemplate(input_variables=["chat"], template=prompt_template_str)
    formatted_prompt = prompt.format(chat=recent)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        return json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in chat analysis. Raw output:\n", text)
        return text

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    try:
        df = pd.read_csv(csv_path)
        print(f"✅ Loaded screen time data: {df.shape[0]} rows, {df.shape[1]} columns")
        return df
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)

    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", text)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # ✅ Validation & fallback handling
    if not isinstance(data, dict):
        print("⚠️ Screen time JSON result is not a dictionary.")
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    try:
        val = int(data.get("clarity_score", 50))
        data["clarity_score"] = max(0, min(100, val))
    except Exception:
        print("⚠️ Invalid clarity_score, setting default 50")
        data["clarity_score"] = 50

    return data



# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    print("🔍 Columns in CSV:", df.columns.tolist())
    tweet_col = None
    for col in df.columns:
        if col.strip().lower() in ["tweet", "text", "message", "content"]:
            tweet_col = col
            break
    if not tweet_col:
        string_cols = df.select_dtypes(include='object')
        tweet_col = string_cols.apply(lambda c: c.str.len().mean()).idxmax()
        print(f"✅ Auto-selected tweet column: '{tweet_col}'")

    def analyze_sentiment_llm(tweet):
        prompt = f"""
You are a sentiment expert. Classify the tweet as one word: Positive, Negative, or Neutral.

Tweet: "{tweet}"
Sentiment:"""
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            sentiment = res['choices'][0]['message']['content'].strip()
            # Normalize output
            if sentiment.lower() not in ["positive", "negative", "neutral"]:
                return "Neutral"
            return sentiment
        except Exception as e:
            print("⚠️ Error analyzing tweet:", e)
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment_llm)
    return df

# --- Final Report Synthesis ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_counts = sentiment_df["sentiment"].value_counts().to_dict()
    sentiment_summary = f"Sentiment counts: {sentiment_counts}"

    prompt_template_str = """
You are a NeuroAI fusion advisor from the future.

Combine these:
1. WhatsApp analysis:
{chat_json}

2. Screen time report:
{screen_json}

3. Twitter sentiment:
{sentiments}

Summarize teen mental health:
- Mood and stress pattern
- Top 3 issues
- Mindfulness movie/song list
- Futuristic habit suggestions

Respond warmly and clearly.
"""
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template=prompt_template_str
    )
    formatted_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2) if isinstance(chat_json, dict) else str(chat_json),
        screen_json=json.dumps(screen_json, indent=2) if isinstance(screen_json, dict) else str(screen_json),
        sentiments=sentiment_summary
    )

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))
    return text

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        # 1. WhatsApp
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No usable messages."

        # 2. Screen Time
        df_screen = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(df_screen) if not df_screen.empty else "No screen time data."

        # 3. Twitter Sentiment
        df_tweets = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(df_tweets)

        # 4. Final Mental Health Report
        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Mental Health Summary:\n", final_report)
        print(final_report)  # safer and already used in the main pipeline


    except Exception as e:
        print(" Pipeline :", e)




import json

def analyze_screen_time(df):
    readable = df.to_string(index=False)

    prompt_template_str = """
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format as:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    prompt = PromptTemplate(input_variables=["data"], template=prompt_template_str)
    formatted_prompt = prompt.format(data=readable)

    response = llm([HumanMessage(content=formatted_prompt)])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))

    try:
        data = json.loads(text)
    except json.JSONDecodeError:
        print("⚠️ JSON parsing failed in screen time analysis. Raw output:\n", text)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # ✅ Validation & fallback handling
    if not isinstance(data, dict):
        print("⚠️ Screen time JSON result is not a dictionary.")
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

    # Clamp clarity score between 0–100
    try:
        val = int(data.get("clarity_score", 50))
        data["clarity_score"] = max(0, min(100, val))
    except Exception:
        print("⚠️ Invalid clarity_score, setting default 50")
        data["clarity_score"] = 50

    return data

print(f"🧠 Mood: {analysis['mood']} | 🚩 Issues: {', '.join(analysis['top_issues'])} | 🎬 Movies: {', '.join(analysis['recommended_movies'])} | 🎵 Songs: {', '.join(analysis['recommended_songs'])} | 🌿 Habits: {', '.join(analysis['habits'])}")






✅ Loaded OpenAI Key: True
🔍 Files in ZIP: ['whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/1.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/10.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/11.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/12.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/13.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/14.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/15.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/16.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/17.png', 'whatsapp_chat_analysis-3b04f34f20d87a7aa02ff988c1fcb892f3aa393d/Images/18.png', 'what

In [13]:
pip install nltk emoji

Collecting emoji
  Downloading emoji-2.14.1-py3-none-any.whl.metadata (5.7 kB)
Downloading emoji-2.14.1-py3-none-any.whl (590 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/590.6 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m583.7/590.6 kB[0m [31m19.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m590.6/590.6 kB[0m [31m12.4 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: emoji
Successfully installed emoji-2.14.1


In [17]:
import os
import re
import json
import pandas as pd
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.schema import HumanMessage
import openai
from tqdm import tqdm
import zipfile

# Load environment variables and API key
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
api_key = openai.api_key or "sk-your-fallback-key"

llm = ChatOpenAI(model_name="gpt-4", temperature=0.5, openai_api_key=api_key)

# --- WhatsApp Chat Extraction ---
def extract_whatsapp_messages(zip_path):
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            txt_files = [f for f in zip_ref.namelist() if f.endswith('.txt')]
            if not txt_files:
                return []
            with zip_ref.open(txt_files[0]) as f:
                try:
                    chat_data = f.read().decode('utf-8')
                except UnicodeDecodeError:
                    chat_data = f.read().decode('latin1')
    except Exception as e:
        print("❌ Error extracting chat:", e)
        return []

    lines = chat_data.split('\n')
    merged_lines = []
    date_pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ')
    buffer = ""
    for line in lines:
        if date_pattern.match(line):
            if buffer:
                merged_lines.append(buffer)
            buffer = line
        else:
            buffer += " " + line.strip()
    if buffer:
        merged_lines.append(buffer)

    pattern = re.compile(r'^\d{1,2}/\d{1,2}/\d{2,4}, \d{1,2}:\d{2} [APMapm]{2} - ([^:]+): (.+)$')
    messages = [f"{m.group(1)}: {m.group(2)}" for m in map(pattern.match, merged_lines) if m and "media omitted" not in m.group(2).lower()]
    return messages

# --- WhatsApp Chat Analysis ---
def analyze_chat(messages, n=50):
    recent = "\n".join(messages[-n:])
    prompt_template = PromptTemplate(
        input_variables=["chat"],
        template="""
You are a futuristic AI therapist from 2030.

Analyze these WhatsApp messages:
- Emotional tone (stress, joy, anxiety)
- Mental clarity & decision style
- Mindset type: proactive, reactive, balanced

Recommend:
- 3 apps/habits to avoid
- 3 uplifting movies and songs
- 3 good daily mental health habits

Output ONLY in JSON format:
{"emotional_tone": "...", "clarity": "...", "mindset": "...", "avoid": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Chat:
{chat}
"""
    )
    response = llm([HumanMessage(content=prompt_template.format(chat=recent))])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))
    try:
        return json.loads(text)
    except:
        print("⚠️ Chat JSON parse error:\n", text)
        return text

# --- Screen Time Analysis ---
def load_screen_time(csv_path):
    try:
        return pd.read_csv(csv_path)
    except Exception as e:
        print(f"❌ Failed to load screen time CSV: {e}")
        return pd.DataFrame()

def analyze_screen_time(df):
    readable = df.to_string(index=False)
    prompt_template = PromptTemplate(
        input_variables=["data"],
        template="""
You are a digital wellness AI.

Analyze this screen time data:
- Focus vs distraction
- Burnout, overuse, addiction
- Decision fatigue signs

Recommend:
- Mental clarity (0-100)
- Avoid apps
- 3 inspiring movies and calming songs
- 3 digital detox habits

Output ONLY in JSON format:
{"clarity_score": 0, "fatigue": "...", "avoid_apps": [...], "recommend": {"movies": [...], "songs": [...]}, "habits": [...] }

Screen Time Data:
{data}
"""
    )
    response = llm([HumanMessage(content=prompt_template.format(data=readable))])
    text = response[0].content if isinstance(response, list) else getattr(response, "content", str(response))
    try:
        data = json.loads(text)
        data["clarity_score"] = max(0, min(100, int(data.get("clarity_score", 50))))
        return data
    except:
        print("⚠️ Screen time JSON parse error:\n", text)
        return {"clarity_score": 50, "fatigue": "Unknown", "avoid_apps": [], "recommend": {"movies": [], "songs": []}, "habits": []}

# --- Twitter Sentiment Analysis ---
def analyze_tweets(df):
    tweet_col = next((col for col in df.columns if col.lower() in ["tweet", "text", "message", "content"]), None)
    if not tweet_col:
        tweet_col = df.select_dtypes(include='object').apply(lambda c: c.str.len().mean()).idxmax()

    def analyze_sentiment(tweet):
        prompt = f'Tweet: "{tweet}"\nClassify as one word: Positive, Negative, or Neutral.'
        try:
            res = openai.ChatCompletion.create(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": prompt}],
                temperature=0.0
            )
            sentiment = res['choices'][0]['message']['content'].strip().capitalize()
            return sentiment if sentiment in ["Positive", "Negative", "Neutral"] else "Neutral"
        except:
            return "Error"

    tqdm.pandas()
    df["sentiment"] = df[tweet_col].progress_apply(analyze_sentiment)
    return df

# --- Final Report ---
def synthesize_report(chat_json, screen_json, sentiment_df):
    sentiment_summary = sentiment_df["sentiment"].value_counts().to_dict()
    prompt = PromptTemplate(
        input_variables=["chat_json", "screen_json", "sentiments"],
        template="""
You are a NeuroAI advisor.

Combine:
1. WhatsApp analysis: {chat_json}
2. Screen time report: {screen_json}
3. Twitter sentiment: {sentiments}

Summarize teen mental health:
- Mood and stress
- Top 3 issues
- Mindful movie/song list
- 3 futuristic daily mental health habits

Respond in natural tone.
"""
    )
    full_prompt = prompt.format(
        chat_json=json.dumps(chat_json, indent=2),
        screen_json=json.dumps(screen_json, indent=2),
        sentiments=sentiment_summary
    )
    response = llm([HumanMessage(content=full_prompt)])
    return response[0].content if isinstance(response, list) else str(response)

# --- MAIN PIPELINE ---
if __name__ == "__main__":
    whatsapp_file = "whatsapp_chat_analysis.zip"
    screen_time_file = "screentime_analysis.csv"
    twitter_file = "teen_tweets.csv"

    try:
        chat_msgs = extract_whatsapp_messages(whatsapp_file)
        chat_result = analyze_chat(chat_msgs) if chat_msgs else "No messages"

        screen_df = load_screen_time(screen_time_file)
        screen_result = analyze_screen_time(screen_df) if not screen_df.empty else "No screen data"

        tweets_df = pd.read_csv(twitter_file)
        sentiment_df = analyze_tweets(tweets_df)

        final_report = synthesize_report(chat_result, screen_result, sentiment_df)
        print("\n🧠 Final Teen Mental Health Summary:\n")
        print(final_report)

    except Exception as e:
        print("❌ Pipeline Error:", e)
        print(f"🧠 Mood: {analysis['mood']} | 🚩 Issues: {', '.join(analysis['top_issues'])} | 🎬 Movies: {', '.join(analysis['recommended_movies'])} | 🎵 Songs: {', '.join(analysis['recommended_songs'])} | 🌿 Habits: {', '.join(analysis['habits'])}")






❌ Pipeline Error: '"clarity_score"'
🧠 Mood: Stressed and anxious with periods of joy | 🚩 Issues: Overuse of social media, Sleep deprivation, Lack of mental clarity | 🎬 Movies: Inside Out, The Pursuit of Happyness, Soul | 🎵 Songs: Weightless - Marconi Union, Lovely Day - Bill Withers, Here Comes the Sun - The Beatles | 🌿 Habits: Daily journaling, 30-minute screen-free walk, Night-time digital detox routine
