<a href="https://colab.research.google.com/github/gawandepranil/INFOTACT/blob/main/ai_powered_task_management_system.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [89]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta
import matplotlib.pyplot as plt
import seaborn as sns
import nltk
import re
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer

In [90]:
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [91]:
df=pd.read_csv("/content/ai_task_management.csv")

In [92]:
df.shape

(20122, 8)

In [93]:
df.head()

Unnamed: 0,Task Description,Category,Skill,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7
0,Implement user authentication,backend,spring boot,,,,,
1,Optimize server performance,backend,asp.net,,,,,
2,Manage database operations,backend,django,,,,,
3,Implement user authentication,backend,api,,,,,
4,Build a microservice,backend,kotlin,,,,,


In [94]:
df.columns

Index(['Task Description', 'Category', 'Skill', 'Unnamed: 3', 'Unnamed: 4',
       'Unnamed: 5', 'Unnamed: 6', 'Unnamed: 7'],
      dtype='object')

In [95]:
df.rename(columns={
    'Unnamed: 3': 'Deadline',
    'Unnamed: 4': 'Priority',
    'Unnamed: 5': 'User Skills',
    'Unnamed: 6': 'Workload',
    'Unnamed: 7': 'Assigned User'
}, inplace=True)


In [96]:
df.head()

Unnamed: 0,Task Description,Category,Skill,Deadline,Priority,User Skills,Workload,Assigned User
0,Implement user authentication,backend,spring boot,,,,,
1,Optimize server performance,backend,asp.net,,,,,
2,Manage database operations,backend,django,,,,,
3,Implement user authentication,backend,api,,,,,
4,Build a microservice,backend,kotlin,,,,,


In [97]:

unique_tasks = df['Task Description'].unique()
print("Total unique task descriptions:", len(unique_tasks))

unique_categories = df['Category'].unique()
print("Total unique categories:", len(unique_categories))

unique_skills = df['Skill'].unique()
print("Total unique skills:", len(unique_skills))

Total unique task descriptions: 265
Total unique categories: 13
Total unique skills: 232


In [98]:
duplicate_count = df.duplicated().sum()
print(f"Total duplicate rows: {duplicate_count}")

Total duplicate rows: 19333


In [99]:
df = df.drop_duplicates()

In [100]:
print("shape",df.shape)
print("*******")
print(df.head())

shape (789, 8)
*******
                Task Description Category        Skill  Deadline  Priority  \
0  Implement user authentication  backend  spring boot       NaN       NaN   
1    Optimize server performance  backend      asp.net       NaN       NaN   
2     Manage database operations  backend       django       NaN       NaN   
3  Implement user authentication  backend          api       NaN       NaN   
4           Build a microservice  backend       kotlin       NaN       NaN   

   User Skills  Workload  Assigned User  
0          NaN       NaN            NaN  
1          NaN       NaN            NaN  
2          NaN       NaN            NaN  
3          NaN       NaN            NaN  
4          NaN       NaN            NaN  


In [101]:
print("Null values:\n", df.isnull().sum())

Null values:
 Task Description      0
Category              0
Skill                 0
Deadline            789
Priority            789
User Skills         789
Workload            789
Assigned User       789
dtype: int64


In [102]:
df.dropna(subset=['Task Description', 'Skill'], inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.dropna(subset=['Task Description', 'Skill'], inplace=True)


In [103]:
print("Category distribution:\n", df['Category'].value_counts())

Category distribution:
 Category
ai/ml                      127
backend                    122
frontend                   112
devops                      74
deployment                  71
database administration     64
testing                     58
project management          58
documentation               53
cloud                       35
data science                 5
database                     5
ui/ux design                 5
Name: count, dtype: int64


In [104]:
print("\nSkill distribution:\n", df['Skill'].value_counts())


Skill distribution:
 Skill
aws                    15
docker                 15
kubernetes             15
google cloud           14
heroku                 13
                       ..
Logistic Regression     1
pytorch                 1
apache spark            1
nltk                    1
tensorflow              1
Name: count, Length: 232, dtype: int64


In [105]:
all_skills = df['Skill'].dropna().str.lower().str.split(',').sum()
unique_skills = list(set([skill.strip() for skill in all_skills]))


In [106]:
users = [
    'Ram', 'abhay', 'ashwith', 'Charu','Aarav', 'Maahi', 'Ramesh', 'karn', 'Karthik', 'Anaaaya',
    'Digvijay', 'Sam', 'Arjun','Neeru', 'prabhas', 'keerthan', 'bhargav', 'ramya', 'sushma', 'bindhu',
    'vaishu','jaya', 'deepthi', 'aadhya', 'Mahesh', 'raakhi', 'tanush', 'Divya', 'kiran', 'Nishila',
    'nikitha', 'krishna', 'abhimanyu', 'krish', 'parnika', 'amani', 'Harshitha', 'Pranavi', 'aaradhya', 'Arnav'
]

user_skill_map = {}
for user in users:
    assigned_skills = random.sample(unique_skills, random.choice([3, 4]))
    user_skill_map[user] = assigned_skills



for user, skills in user_skill_map.items():
    print(f"{user}: {skills}")


Ram: ['react', 'deepspeech', 'load balancing']
abhay: ['server configuration', 'jwt', 'dialogflow']
ashwith: ['python', 'data', 'aria']
Charu: ['jest', 'prometheus', 'crnn']
Aarav: ['faster r-cnn', 'gsap', 'trello', 'php']
Maahi: ['yolov4', 'facenet', 'rust', 'redis']
Ramesh: ['transformer', 'ci/cd', 'api documentation', 'gpt-2 fine-tuning']
karn: ['mediapipe', 'java', 'librosa', 'matrix factorization']
Karthik: ['prometheus', 'deepspeech', 'ms project', 'load balancing']
Anaaaya: ['kotlin', 'gpt-3', 'foundation']
Digvijay: ['webpack', 'liwc', 'docker', 'selenium']
Sam: ['redux', 'django', 'u-net']
Arjun: ['kotlin', 'react', 'zero-shot bert', 'cypress']
Neeru: ['mobilenet', 'kubernetes', 'java']
prabhas: ['jenkins', 'elasticsearch', 'neuralstyle']
keerthan: ['ruby', 'deoldify', 'pca']
bhargav: ['openke', 'vue', 'redis']
ramya: ['yolov4', 'zero-shot bert', 'storage']
sushma: ['xception', 'vader', 'praat']
bindhu: ['django', 'ms project', 'opencv']
vaishu: ['pca', 'transformer', 'dall-e'

In [107]:
# adding deadlines
today = datetime.today().date()
df['Deadline'] = [today + timedelta(days=random.randint(1, 60)) for _ in range(len(df))]
# adding workload
df['Workload'] = np.random.randint(1, 11, size=len(df))
# adding priority

def assign_priority(row):
    days_left = (row['Deadline'] - today).days
    if days_left <= 7 or row['Workload'] >= 8:
        return 'High'
    elif days_left <= 20:
        return 'Medium'
    else:
        return 'Low'

df['Priority'] = df.apply(assign_priority, axis=1)

def find_best_user(required_skill):
    # Iterate over the user_skill_map dictionary, which has user names as keys and skills as values
    for user, skills in user_skill_map.items():
        # Check if the required skill is in the list of skills for the current user
        if isinstance(skills, list) and required_skill.lower() in [s.lower() for s in skills]:
            return user
    # If no user is found with the exact skill, return a random user from the user_skill_map keys
    return random.choice(list(user_skill_map.keys()))

# Apply the corrected function to the 'Skill' column
df['Assigned User'] = df['Skill'].apply(find_best_user)


In [108]:
def get_user_skills(user):
    # Use user_skill_map to get skills for the user, default to an empty list if user not found
    return ', '.join(user_skill_map.get(user, []))

df['User Skills'] = df['Assigned User'].apply(get_user_skills)

In [109]:
df.head()

Unnamed: 0,Task Description,Category,Skill,Deadline,Priority,User Skills,Workload,Assigned User
0,Implement user authentication,backend,spring boot,2025-07-05,High,"keras, gsap, spring boot, vader",10,Arnav
1,Optimize server performance,backend,asp.net,2025-07-21,Low,"kotlin, react, zero-shot bert, cypress",1,Arjun
2,Manage database operations,backend,django,2025-07-29,Low,"redux, django, u-net",6,Sam
3,Implement user authentication,backend,api,2025-07-25,High,"transformer, ci/cd, api documentation, gpt-2 f...",9,Ramesh
4,Build a microservice,backend,kotlin,2025-07-07,Low,"kotlin, gpt-3, foundation",4,Anaaaya
