In [None]:
# Doomscrolling Detector 🕵️‍♂️
Detect when a user is doomscrolling based on simulated phone/social media usage data. 

In [None]:
## 1. Import Libraries

In [3]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, confusion_matrix
import random

In [4]:
## 2. Simulate Data

In [5]:
# Parameters
n_samples = 500
random.seed(42)
np.random.seed(42)

# Helper functions
def random_time():
    hour = np.random.randint(0, 24)
    minute = np.random.randint(0, 60)
    return f'{hour:02d}:{minute:02d}'

def random_keywords():
    doom_words = ['war', 'crash', 'death', 'crisis', 'disaster', 'collapse', 'pandemic', 'violence']
    neutral_words = ['cat', 'recipe', 'music', 'travel', 'sports', 'weather', 'science', 'art']
    if np.random.rand() < 0.5:
        return random.sample(doom_words, k=np.random.randint(1, 4))
    else:
        return random.sample(neutral_words, k=np.random.randint(1, 4))

def sentiment_from_keywords(keywords):
    negative = {'war', 'crash', 'death', 'crisis', 'disaster', 'collapse', 'pandemic', 'violence'}
    score = sum(1 for k in keywords if k in negative)
    return 'negative' if score > 0 else 'positive'

data = []
for _ in range(n_samples):
    time_of_day = random_time()
    hour = int(time_of_day.split(':')[0])
    scrolling_speed = np.random.choice(['fast', 'slow'])
    num_scrolls = np.random.randint(20, 500)
    session_length = np.random.randint(5, 90)  # in minutes
    keywords = random_keywords()
    sentiment = sentiment_from_keywords(keywords)
    # Doomscrolling label: more likely if late, negative, long session, many scrolls
    doom = (hour >= 23 or hour <= 5) and sentiment == 'negative' and session_length > 30 and num_scrolls > 100
    data.append({
        'time_of_day': time_of_day,
        'hour': hour,
        'scrolling_speed': scrolling_speed,
        'num_scrolls': num_scrolls,
        'session_length': session_length,
        'keywords': ','.join(keywords),
        'sentiment': sentiment,
        'doomscrolling': int(doom)
    })
df = pd.DataFrame(data)
df.head()

Unnamed: 0,time_of_day,hour,scrolling_speed,num_scrolls,session_length,keywords,sentiment,doomscrolling
0,06:51,6,fast,290,76,"recipe,cat,weather",positive,0
1,18:22,18,fast,478,28,sports,positive,0
2,01:23,1,slow,433,42,crisis,negative,1
3,00:11,0,slow,41,53,"travel,recipe,weather",positive,0
4,09:27,9,slow,483,19,"crash,collapse,pandemic",negative,0


In [6]:
## 3. Feature Engineering

In [20]:
# Encode categorical features
df['scrolling_speed_fast'] = (df['scrolling_speed'] == 'fast').astype(int)
df['sentiment_negative'] = (df['sentiment'] == 'negative').astype(int)
# Features
features = ['hour', 'scrolling_speed_fast', 'num_scrolls', 'session_length', 'sentiment_negative']
X = df[features]
y = df['doomscrolling']

In [8]:
## 4. Train/Test Split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [10]:
## 5. Train Model

In [21]:
model = LogisticRegression()
model.fit(X_train, y_train)

In [12]:
## 6. Evaluate Model

In [13]:
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97        88
           1       1.00      0.50      0.67        12

    accuracy                           0.94       100
   macro avg       0.97      0.75      0.82       100
weighted avg       0.94      0.94      0.93       100

Confusion Matrix:
[[88  0]
 [ 6  6]]


In [18]:
## 7. Demo: Predict and Alert

In [None]:
# Pick a random session
sample = df.sample(1)
sample_X = sample[features]
pred = model.predict(sample_X)[0]
print('Session:')
print(sample[['time_of_day', 'scrolling_speed', 'num_scrolls', 'session_length', 'keywords', 'sentiment']].to_string(index=False))
if pred == 1:
    print('⚠️  You\'ve been scrolling negative content for a while — take a break?')
else:
    print('👍 You\'re not doomscrolling!')

Session:
time_of_day scrolling_speed  num_scrolls  session_length keywords sentiment
      01:08            fast          312              24    death  negative
👍 You're not doomscrolling!
