In [2]:
!pip install -U langchain-community


Collecting langchain-community
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

In [1]:
pip install openai==0.28



In [7]:
# --- Imports ---
import pandas as pd
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import openai
import os
import getpass
from langchain.prompts import PromptTemplate
from langchain.llms import OpenAI as LangChainOpenAI
from langchain.chains import LLMChain

# --- Set API Key ---
os.environ['OPENAI_API_KEY'] = getpass.getpass("Enter your OpenAI API key:")
openai.api_key = os.getenv("OPENAI_API_KEY")

# === PART 1: Screen Time Analysis ===
print("\n[1] Processing Screen Time Data...")

# Load dataset
df = pd.read_csv('/content/screentime_analysis.csv')  # Update path if needed
df['Date'] = pd.to_datetime(df['Date'])

# Label Encoding
label_encoder = LabelEncoder()
df['App_Encoded'] = label_encoder.fit_transform(df['App'])

# Feature Engineering
df['Day_of_Week'] = df['Date'].dt.dayofweek
df['Day_of_Month'] = df['Date'].dt.day
df_original = df.copy()

# Scaling
scaler = StandardScaler()
scale_cols = ['Usage (minutes)', 'Notifications', 'Times Opened', 'Day_of_Week', 'Day_of_Month']
df[scale_cols] = scaler.fit_transform(df[scale_cols])

# Optional: Train-test split
X = df[['App_Encoded', 'Notifications', 'Times Opened', 'Day_of_Week', 'Day_of_Month']]
y = df['Usage (minutes)']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Summary for LLM
summary = (
    df_original.groupby("App")[['Usage (minutes)', 'Notifications', 'Times Opened']]
    .agg({
        'Usage (minutes)': 'sum',
        'Notifications': 'mean',
        'Times Opened': 'mean'
    })
    .sort_values("Usage (minutes)", ascending=False)
    .round(2)
)

# LLM prompt
summary_text = summary.to_string()
prompt_screen = f"""
You are a digital wellbeing coach.

Here's the mobile app usage summary:
{summary_text}

Based on this data:
1. Identify the most time-consuming apps.
2. Suggest usage patterns based on notifications and app opens.
3. Recommend ways to improve screen time habits.
4. Offer tips for better digital balance.
"""

# OpenAI call
try:
    response = openai.ChatCompletion.create(
        model="gpt-4",  # or "gpt-3.5-turbo"
        messages=[
            {"role": "system", "content": "You are a helpful digital wellbeing assistant."},
            {"role": "user", "content": prompt_screen}
        ],
        temperature=0.7
    )
    print("\n--- Digital Wellbeing Insights ---\n")
    print(response.choices[0].message.content)

except Exception as e:
    print(f"Error calling OpenAI API for screen time: {e}")

# === PART 2: WhatsApp Sentiment Analysis ===
print("\n[2] Analyzing WhatsApp Messages...")

# --- Chat Parser ---
def load_whatsapp_chat(filename):
    messages = []
    with open(filename, 'r', encoding='latin1') as file:
        lines = file.readlines()

    for line in lines:
        if " - " in line and ": " in line:
            try:
                datetime_part, content = line.split(" - ", 1)
                sender, message = content.split(": ", 1)
                messages.append({
                    "sender": sender.strip(),
                    "message": message.strip()
                })
            except ValueError:
                continue  # skip malformed lines
    return messages


# Load messages
filename = '/content/whatsapp_chat_analysis.zip'  # Update this path
messages = load_whatsapp_chat(filename)
print("Total messages parsed:", len(messages))

# LangChain LLM Setup
template = """
You are a helpful assistant that analyzes WhatsApp messages for tone and sentiment.
Given a message, classify it as Positive, Negative, or Neutral with a brief explanation.

Message: "{message}"
Sentiment:
"""
prompt = PromptTemplate(input_variables=["message"], template=template)
llm = LangChainOpenAI(temperature=0)
chain = LLMChain(llm=llm, prompt=prompt)

# Analyze first 10 messages
results = []
for i, item in enumerate(messages[:10]):
    try:
        analysis = chain.run(message=item["message"])
        results.append({
            "sender": item["sender"],
            "message": item["message"],
            "analysis": analysis.strip()
        })
    except Exception as e:
        results.append({
            "sender": item.get("sender", "Unknown"),
            "message": item.get("message", ""),
            "analysis": f"Error: {str(e)}"
        })


# Convert to DataFrame and display
df_results = pd.DataFrame(results)

# Check if required columns exist
print("Columns in df_results:", df_results.columns.tolist())
print("\n--- WhatsApp Sentiment Summary (Top 10) ---\n")
if {'sender', 'message', 'analysis'}.issubset(df_results.columns):
    print(df_results[['sender', 'message', 'analysis']])
else:
    print("Missing expected columns in df_results.")


Enter your OpenAI API key:··········

[1] Processing Screen Time Data...
Error calling OpenAI API for screen time: Incorrect API key provided: exit. You can find your API key at https://platform.openai.com/account/api-keys.

[2] Analyzing WhatsApp Messages...
Total messages parsed: 0
Columns in df_results: []

--- WhatsApp Sentiment Summary (Top 10) ---

Missing expected columns in df_results.
