# You can get the dataset from here 
https://www.kaggle.com/datasets/umerfarooq09/skill-based-task-assignment

# The imports 

In [1]:
import pandas as pd
from langchain import PromptTemplate
import google.generativeai as genai
import json

# Consultation

In [3]:
data = pd.read_csv('Task Catagories.csv.csv')

In [4]:
data.head()

Unnamed: 0,Task Description,Category,Skill
0,Implement user authentication,backend,spring boot
1,Optimize server performance,backend,asp.net
2,Manage database operations,backend,django
3,Implement user authentication,backend,api
4,Build a microservice,backend,kotlin


In [5]:
data.isnull().sum()

Task Description    0
Category            0
Skill               0
dtype: int64

In [6]:
len(data)

20122

# Step 1: Extract unique words from 'Task Description'

In [None]:
def extract_unique_words(df, column_name):
    # Extract words, remove duplicates, and lowercase them
    words = set(word.lower() for desc in df[column_name] for word in desc.split())
    return list(words)

unique_words = extract_unique_words(data, 'Task Description')

# Step 2: Initialize Google Gemini model

In [None]:
api_key = 'YOUR_GEMINI_API_KEY'
genai.configure(api_key=api_key)
llm = genai.GenerativeModel('models/gemini-1.5-flash')

# Step 3: Use the template to reformulate <br>prompt based on user input and unique words

In [None]:
# Define PromptTemplate for generating related keywords from user prompt and unique words
prompt_template = PromptTemplate(
    input_variables=["prompt", "unique_words"],
    template="""
    Based on the task descriptions and unique keywords below, generate a comma-separated list 
    of relevant keywords related to the user prompt.
    User prompt: "{prompt}"
    Unique keywords from task descriptions: {unique_words}
    """
) 
def get_related_keywords(user_prompt, unique_words, llm):
    prompt = prompt_template.format(prompt=user_prompt, unique_words=unique_words)
    response = llm.generate_content(prompt)
    return response.text.split(', ')

# Step 4: Filter DataFrame based <br>on retrieved keywords across all columns

In [None]:
def filter_tasks_by_keywords(df, keywords):
    # Create masks for each column
    task_mask = df['Task Description'].str.contains('|'.join(keywords), case=False, na=False)
    category_mask = df['Category'].str.contains('|'.join(keywords), case=False, na=False)
    skill_mask = df['Skill'].str.contains('|'.join(keywords), case=False, na=False)
    
    # Combine masks with OR operation
    combined_mask = task_mask | category_mask | skill_mask
    
    # Filter DataFrame
    filtered_df = df[combined_mask]
    return filtered_df

# Step 5: Combine the filtered tasks <br>with the user prompt and query <br>Gemini for the final answer

In [None]:
def query_llm_for_answer(filtered_tasks, original_prompt, llm):
    # Format each row as a structured entry
    formatted_tasks = []
    for _, row in filtered_tasks.iterrows():
        task_entry = (
            f"Task: {row['Task Description']}\n"
            f"Category: {row['Category']}\n"
            f"Required Skill: {row['Skill']}"
        )
        formatted_tasks.append(task_entry)
    
    # Join all formatted entries
    tasks_text = "\n\n".join(formatted_tasks)
    
    final_prompt = f"""
    Here are the relevant tasks with their categories and required skills:
    
    {tasks_text}
    
    Based on these tasks, please analyze and provide the most relevant matches for the following request:
    "{original_prompt}"
    
    Please structure your response to include:
    1. The most relevant task(s)
    2. Their categories
    3. Required skills
    Only include information that is directly relevant to the user's request.
    """
    
    final_response = llm.generate_content(final_prompt)
    return final_response.text


# Example usage

In [22]:
user_prompt = "Website creation"
related_keywords = get_related_keywords(user_prompt, unique_words, llm)
filtered_tasks = filter_tasks_by_keywords(df, related_keywords)
response = query_llm_for_answer(filtered_tasks, user_prompt, llm)
print("Response to user:", response)

Response to user: Here's the analysis of "Website creation" based on the provided tasks:

1. **Most Relevant Tasks:**
    * **Create a responsive website**
    * **Implement a landing page**
    * **Improve website accessibility**
    * **Design a user-friendly interface**

2. **Categories:** 
    * Frontend

3. **Required Skills:**
    * HTML
    * CSS
    * JavaScript
    * Bootstrap
    * Foundation
    * Materialize
    * Vue
    * React
    * Redux
    * Sass
    * UX
    * UI
    * jQuery
    * User-friendly 
    * ARIA 



# Saving unique words

In [20]:
def save_unique_words(df):
    # Extract unique words from 'Task Description'
    words = set(word.lower() for desc in df['Task Description'] for word in desc.split())
    
    # Save to JSON file
    with open('unique_words.json', 'w') as f:
        json.dump(list(words), f)

save_unique_words(data)