In [3]:
import pandas as pd
import numpy as np
from pymongo import MongoClient
from datetime import datetime
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation

class TaskPrioritizer:
    def __init__(self, db_uri="mongodb://localhost:27017/", db_name="pro-manage-db"):
        self.db_uri = db_uri
        self.db_name = db_name
        self.client = None
        self.db = None
        self.vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
        self.lda = LatentDirichletAllocation(n_components=5, random_state=42)
        self.negative_keywords = [
            'bug', 'critical', 'fix', 'urgent', 'block', 
            'error', 'fail', 'issue', 'broken', 'outage'
        ]
        
    def connect_db(self):
        """Establish MongoDB connection"""
        try:
            self.client = MongoClient(self.db_uri)
            self.db = self.client[self.db_name]
            # Test the connection
            self.db.command('ping')
            print(f"Successfully connected to MongoDB database '{self.db_name}'")
            return True
        except Exception as e:
            print(f"Database connection failed: {str(e)}")
            return False

    def fetch_tasks(self):
        """Fetch tasks from MongoDB with error handling"""
        if not self.db:
            return pd.DataFrame()
            
        try:
            tasks = list(self.db.tasks.find({"isTrashed": False}))
            if not tasks:
                print("No active tasks found in database")
                return pd.DataFrame()
            print(f"Found {len(tasks)} active tasks")
            return pd.DataFrame(tasks)
        except Exception as e:
            print(f"Error fetching tasks: {str(e)}")
            return pd.DataFrame()

    def preprocess_data(self, df):
        """Clean and prepare task data"""
        if df.empty:
            return df
            
        # Handle missing fields
        df['description'] = df['activities'].apply(
            lambda x: x[0]['description'] if x and isinstance(x, list) and len(x) > 0 and 'description' in x[0] else ''
        )
        
        # Convert and calculate dates
        df['deadline'] = pd.to_datetime(df['date'])
        df['time_remaining'] = (df['deadline'] - pd.Timestamp.now()).dt.days
        
        # Standardize priorities
        priority_map = {
            'low': 1, 'normal': 2, 'medium': 2, 'high': 3,
            'critical': 4, 'urgent': 4, 'blocker': 5
        }
        df['priority'] = df['priority'].str.lower().map(priority_map).fillna(2)
        
        return df

    def analyze_complexity(self, df):
        """Perform text analysis to estimate complexity"""
        if df.empty:
            return df
            
        # Combine text features
        df['combined_text'] = df['title'] + " " + df['description']
        
        # Simple negative sentiment detection
        lower_text = df['combined_text'].str.lower()
        df['negative_score'] = lower_text.apply(
            lambda x: sum(1 for word in self.negative_keywords if word in x)
        )
        
        # TF-IDF and LDA
        tfidf_matrix = self.vectorizer.fit_transform(df['combined_text'])
        self.lda.fit(tfidf_matrix)
        topic_dist = self.lda.transform(tfidf_matrix)
        
        # Complexity features
        df['topic_entropy'] = [np.sum(-p*np.log(p+1e-10)) for p in topic_dist]
        df['word_count'] = df['combined_text'].str.split().str.len()
        
        # Normalized complexity score (0-1)
        df['complexity_score'] = (
            0.5 * (df['topic_entropy'] - df['topic_entropy'].min()) / 
                (df['topic_entropy'].max() - df['topic_entropy'].min() + 1e-10) +
            0.3 * (df['word_count'] - df['word_count'].min()) / 
                (df['word_count'].max() - df['word_count'].min() + 1e-10) +
            0.2 * (df['negative_score'] / (df['negative_score'].max() + 1e-10))
        )
        
        return df

    def calculate_scores(self, df):
        """Calculate priority scores"""
        if df.empty:
            return df
            
        # Urgency score (0-1) with non-linear decay
        df['urgency_score'] = np.where(
            df['time_remaining'] <= 0, 1.0,
            1 - np.tanh(df['time_remaining'] / 7)  # 1-week half-life
        )
        
        # Priority boosters
        in_progress_boost = df['stage'].str.contains('progress', case=False).astype(int) * 0.1
        critical_boost = (df['priority'] >= 4).astype(int) * 0.15
        
        # Final score calculation
        df['overall_score'] = (
            0.35 * df['complexity_score'] +
            0.30 * df['priority'] / 5 +
            0.25 * df['urgency_score'] +
            in_progress_boost +
            critical_boost
        )
        
        return df

    def generate_recommendations(self, top_n=5):
        """Run full pipeline and generate recommendations"""
        if not self.connect_db():
            return []
            
        df = self.fetch_tasks()
        if df.empty:
            return []
            
        df = self.preprocess_data(df)
        df = self.analyze_complexity(df)
        df = self.calculate_scores(df)
        
        # Get top tasks
        top_tasks = df.sort_values('overall_score', ascending=False).head(top_n)
        
        # Format results
        recommendations = []
        for _, task in top_tasks.iterrows():
            reasons = []
            
            # Complexity reasons
            if task['complexity_score'] > 0.7:
                reasons.append("high complexity")
            elif task['complexity_score'] > 0.4:
                reasons.append("moderate complexity")
            else:
                reasons.append("low complexity")
                
            # Urgency reasons
            if task['time_remaining'] <= 0:
                reasons.append("deadline passed!")
            elif task['time_remaining'] <= 3:
                reasons.append("urgent deadline")
            elif task['time_remaining'] <= 7:
                reasons.append("approaching deadline")
                
            # Priority reasons
            priority_labels = {1: 'low', 2: 'medium', 3: 'high', 4: 'critical', 5: 'blocker'}
            reasons.append(f"{priority_labels.get(task['priority'], 'medium')} priority")
            
            # Status awareness
            if 'progress' in str(task['stage']).lower():
                reasons.append("already in progress")
                
            rec = {
                'task_id': str(task['_id']),
                'title': task['title'],
                'deadline': task['deadline'].strftime('%Y-%m-%d'),
                'days_remaining': int(task['time_remaining']),
                'priority_score': round(task['overall_score'], 3),
                'reasons': ", ".join(reasons),
                'suggested_action': self.suggest_action(task),
                'description': task['description'][:100] + '...' if len(task['description']) > 100 else task['description']
            }
            recommendations.append(rec)
            
        return recommendations

    def suggest_action(self, task):
        """Generate suggested action based on task analysis"""
        if task['priority'] >= 4 or task['time_remaining'] <= 0:
            return "DROP EVERYTHING - address immediately"
        elif task['time_remaining'] <= 3:
            return "Expedite completion - allocate resources now"
        elif task['complexity_score'] > 0.7:
            return "Break into subtasks and start immediately"
        elif 'progress' in str(task['stage']).lower():
            return "Continue current work - monitor daily"
        else:
            return "Schedule for completion within 1-2 weeks"

if __name__ == "__main__":
    print("Task Prioritization System for Pro-Manage-DB")
    print("="*50)
    
    prioritizer = TaskPrioritizer(db_name="pro-manage-db")
    recommendations = prioritizer.generate_recommendations()
    
    if not recommendations:
        print("\nNo tasks to recommend or error occurred")
    else:
        print(f"\nTop {len(recommendations)} Recommended Tasks:")
        print("="*50)
        for i, rec in enumerate(recommendations, 1):
            print(f"\n#{i}: {rec['title']}")
            print(f"- Description: {rec['description']}")
            print(f"- Deadline: {rec['deadline']} ({rec['days_remaining']} days remaining)")
            print(f"- Priority Score: {rec['priority_score']:.3f}")
            print(f"- Reasons: {rec['reasons']}")
            print(f"- Action: {rec['suggested_action']}")
            print(f"- Task ID: {rec['task_id']}")

Task Prioritization System for Pro-Manage-DB
Successfully connected to MongoDB database 'pro-manage-db'


NotImplementedError: Database objects do not implement truth value testing or bool(). Please compare with None instead: database is not None