In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load dataset
df = pd.read_csv("task_data_200.csv")

# Preview
df.head()

In [None]:
# Basic stats
print(df.info())
print(df['Priority'].value_counts())
print(df['Completed'].value_counts())
print(df['Assigned_To'].value_counts().head(10))

# Plot: Priority distribution
plt.figure(figsize=(6,4))
sns.countplot(x='Priority', data=df, palette='viridis')
plt.title("Priority Distribution")
plt.show()

# Plot: Completed status
plt.figure(figsize=(6,4))
sns.countplot(x='Completed', data=df, palette='coolwarm')
plt.title("Completed vs Not Completed")
plt.show()


In [None]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix

nltk.download('stopwords')
from nltk.corpus import stopwords
import string

# Clean text
def clean_text(text):
    text = text.lower()
    text = ''.join([char for char in text if char not in string.punctuation])
    stop_words = set(stopwords.words('english'))
    return ' '.join([word for word in text.split() if word not in stop_words])

df['Clean_Description'] = df['Description'].apply(clean_text)

# TF-IDF
tfidf = TfidfVectorizer()
X = tfidf.fit_transform(df['Clean_Description'])
y = df['Priority']

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train model
model = MultinomialNB()
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate
print(classification_report(y_test, y_pred))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Split data
from sklearn.model_selection import train_test_split
X_train_rf, X_test_rf, y_train_rf, y_test_rf = train_test_split(X, y, test_size=0.2, random_state=42)

# Train Random Forest
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_rf, y_train_rf)

# Predictions
y_pred_rf = rf.predict(X_test_rf)

# Evaluation Report
print("Classification Report (Random Forest):\n")
print(classification_report(y_test_rf, y_pred_rf))

# Confusion Matrix
plt.figure(figsize=(6,4))
sns.heatmap(confusion_matrix(y_test_rf, y_pred_rf), annot=True, fmt='d', cmap='Greens')
plt.title(" Random Forest - Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
from datetime import datetime

# Convert Deadline to datetime
df['Deadline'] = pd.to_datetime(df['Deadline'])

# Add Days_Left
df['Days_Left'] = (df['Deadline'] - pd.Timestamp.today()).dt.days

# Binary: Urgent if Days_Left < 5
df['Urgent'] = df['Days_Left'] < 5

# Convert Completed to binary
df['Completed_Binary'] = df['Completed'].map({'Yes': 1, 'No': 0})

# Check updated columns
df[['Description', 'Priority', 'Days_Left', 'Urgent', 'Completed_Binary']].head()


In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from scipy.sparse import hstack
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# 1. TF-IDF on Cleaned Descriptions
tfidf = TfidfVectorizer()
X_text = tfidf.fit_transform(df['Clean_Description'])

# 2. Additional features
X_meta = df[['Days_Left', 'Urgent', 'Completed_Binary']].copy()
scaler = StandardScaler()
X_meta_scaled = scaler.fit_transform(X_meta)

# 3. Combine TF-IDF + Meta
X_combined = hstack([X_text, X_meta_scaled])

# 4. Labels
y_combined = df['Priority']

# 5. Split
X_train_cb, X_test_cb, y_train_cb, y_test_cb = train_test_split(X_combined, y_combined, test_size=0.2, random_state=42)

# 6. Model
rf_combined = RandomForestClassifier(n_estimators=100, random_state=42)
rf_combined.fit(X_train_cb, y_train_cb)

# 7. Predict & Evaluate
y_pred_cb = rf_combined.predict(X_test_cb)
print("Combined Model Evaluation:\n")
print(classification_report(y_test_cb, y_pred_cb))

sns.heatmap(confusion_matrix(y_test_cb, y_pred_cb), annot=True, fmt='d', cmap='Blues')
plt.title("Confusion Matrix - Combined Model")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
# New Voting Classifier without Naive Bayes
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier

model_rf = RandomForestClassifier(n_estimators=100, random_state=42)
model_lr = LogisticRegression(max_iter=1000)

voting_clf = VotingClassifier(estimators=[
    ('rf', model_rf),
    ('lr', model_lr)
], voting='hard')

# Train and evaluate
voting_clf.fit(X_train_v, y_train_v)
y_pred_v = voting_clf.predict(X_test_v)

print("VotingClassifier (RF + LR) Results:\n")
print(classification_report(y_test_v, y_pred_v))

sns.heatmap(confusion_matrix(y_test_v, y_pred_v), annot=True, fmt='d', cmap='YlGnBu')
plt.title("Confusion Matrix - VotingClassifier (RF + LR)")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()


In [None]:
from collections import defaultdict
import random

# Create a copy for assignment
df_assignment = df.copy()

# Get team members list (you can customize)
team_members = ["Kiran", "Sneha", "Rohit", "Aanya"]

# Initialize workload dictionary
workload = {member: 0 for member in team_members}

# Shuffle rows to randomize assignment
df_assignment = df_assignment.sample(frac=1, random_state=42)

# Assign tasks based on priority (High > Medium > Low)
priority_order = ['High', 'Medium', 'Low']
assigned_tasks = []

for priority in priority_order:
    tasks = df_assignment[df_assignment['Priority'] == priority]
    for idx, row in tasks.iterrows():
        # Find member with least load
        assignee = min(workload, key=workload.get)
        workload[assignee] += 1
        df_assignment.at[idx, 'Assigned_To'] = assignee
        assigned_tasks.append((row['Task_ID'], assignee, priority))

# View result
print("Task Assignment Complete!\nWorkload per Member:")
for member, count in workload.items():
    print(f"{member}: {count} tasks")

# Preview assigned DataFrame
df_assignment[['Task_ID', 'Description', 'Priority', 'Assigned_To']].head(10)
