In [17]:
from pymongo import MongoClient
from datetime import datetime
import pandas as pd

def connect_to_mongodb(uri="mongodb://localhost:27017/", db_name="proman-db"):
    """Connect to MongoDB and return the database object"""
    try:
        client = MongoClient(uri)
        db = client[db_name]
        print("Successfully connected to MongoDB")
        return db
    except Exception as e:
        print(f"Error connecting to MongoDB: {e}")
        return None

In [18]:
def fetch_and_preprocess_tasks(db):
    """Fetch tasks from MongoDB and preprocess for analysis"""
    tasks_collection = db['tasks']
    tasks = list(tasks_collection.find({}))
    
    # Convert to DataFrame
    df = pd.DataFrame(tasks)
    
    # Calculate time remaining
    df['deadline'] = pd.to_datetime(df['deadline'])
    df['time_remaining'] = (df['deadline'] - datetime.now()).dt.days
    
    # Handle missing priorities
    if 'priority' not in df.columns:
        df['priority'] = None
    
    return df

In [19]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import numpy as np

def analyze_text_complexity(df):
    """Analyze text fields to estimate task complexity"""
    # Combine title and description
    df['combined_text'] = df['project name/title'] + " " + df['description']
    
    # TF-IDF Vectorization
    vectorizer = TfidfVectorizer(max_features=1000, stop_words='english')
    tfidf_matrix = vectorizer.fit_transform(df['combined_text'])
    
    # Topic Modeling to assess complexity
    lda = LatentDirichletAllocation(n_components=5, random_state=42)
    lda.fit(tfidf_matrix)
    
    # Complexity score based on topic distribution entropy
    topic_dist = lda.transform(tfidf_matrix)
    df['complexity_score'] = [np.sum(-p*np.log(p)) for p in topic_dist]
    
    # Normalize scores
    df['complexity_score'] = (df['complexity_score'] - df['complexity_score'].min()) / \
                            (df['complexity_score'].max() - df['complexity_score'].min())
    
    return df

In [20]:
def calculate_priority_scores(df):
    """Calculate overall priority scores based on multiple factors"""
    # Map priority levels to numerical values if they exist
    if df['priority'].notna().any():
        priority_map = {'low': 1, 'medium': 2, 'high': 3}
        df['priority_numeric'] = df['priority'].str.lower().map(priority_map).fillna(1)
    else:
        df['priority_numeric'] = 1
    
    # Calculate urgency score (more urgent = higher score)
    max_days = max(1, df['time_remaining'].max())  # Avoid division by zero
    df['urgency_score'] = 1 - (df['time_remaining'] / max_days)
    
    # Combine scores (weights can be adjusted)
    df['overall_score'] = (0.4 * df['complexity_score'] + 
                          0.3 * df['priority_numeric'] / 3 + 
                          0.3 * df['urgency_score'])
    
    return df

In [21]:
def generate_recommendations(df, top_n=5):
    """Generate prioritized recommendations with explanations"""
    # Sort by overall score
    df_sorted = df.sort_values('overall_score', ascending=False).head(top_n)
    
    recommendations = []
    for _, row in df_sorted.iterrows():
        explanation = []
        if row['complexity_score'] > 0.7:
            explanation.append("high complexity")
        elif row['complexity_score'] > 0.4:
            explanation.append("moderate complexity")
        else:
            explanation.append("low complexity")
            
        if row['time_remaining'] <= 3:
            explanation.append("urgent deadline")
        elif row['time_remaining'] <= 7:
            explanation.append("approaching deadline")
            
        if 'priority' in row and pd.notna(row['priority']):
            explanation.append(f"{row['priority']} priority")
            
        rec = {
            'project/task': row['project name/title'],
            'deadline': str(row['deadline'].date()),
            'days_remaining': row['time_remaining'],
            'reason': ", ".join(explanation),
            'overall_score': round(row['overall_score'], 3)
        }
        recommendations.append(rec)
    
    return recommendations

In [22]:
def run_prioritization_pipeline():
    """Complete pipeline from data extraction to recommendations"""
    # Connect to DB
    db = connect_to_mongodb()
    if not db:
        return None
    
    # Extract and preprocess data
    df = fetch_and_preprocess_tasks(db)
    
    # Analyze text complexity
    df = analyze_text_complexity(df)
    
    # Calculate priority scores
    df = calculate_priority_scores(df)
    
    # Generate recommendations
    recommendations = generate_recommendations(df)
    
    return recommendations

In [23]:
if __name__ == "__main__":
    recommendations = run_prioritization_pipeline()
    print("Top 5 Recommended Tasks/Projects:")
    for i, rec in enumerate(recommendations, 1):
        print(f"\n{i}. {rec['project/task']}")
        print(f"   Deadline: {rec['deadline']} ({rec['days_remaining']} days remaining)")
        print(f"   Priority Score: {rec['overall_score']}")
        print(f"   Reason: {rec['reason']}")

Successfully connected to MongoDB


NotImplementedError: Database objects do not implement truth value testing or bool(). Please compare with None instead: database is not None