In [1]:
import pandas as pd

In [24]:
df = pd.read_csv(r'/Datasets/Training Needs AI /marketing_research_training_data_extended.csv')

In [25]:
df.head()

Unnamed: 0,Employee_ID,Last_Name,Email,Role,Department,Training_Needs,Training_Type
0,1,Smith,emma.smith@marketingresearch.com,Specialist,Data Analytics,Interested in teamwork strategies with tools l...,Soft Skill
1,2,Smith,laura.smith@marketingresearch.com,Manager,Data Analytics,Requires advanced training in SEMrush for auto...,Hard Skill
2,3,Garcia,emily.garcia@marketingresearch.com,Coordinator,Market Research,Requires emotional intelligence training to co...,Soft Skill
3,4,Williams,emma.williams@marketingresearch.com,Coordinator,Brand Strategy,Seeking training on communication skills and u...,Soft Skill
4,5,Martinez,jane.martinez@marketingresearch.com,Coordinator,Survey Analysis,Looking for specialized knowledge in AWS for s...,Hard Skill


In [2]:
import pandas as pd
import ollama
import json
from concurrent.futures import ThreadPoolExecutor

# Load the dataset
df = pd.read_csv("/marketing_research_training_data_extended.csv")

# Limit dataset to 100 rows for faster execution
df = df.head(100)

# Function to extract only the tool name using JSON enforcement
def extract_tools(text):
    prompt = f"""
    Identify if there is any tool, software, or platform mentioned in the following training need description.
    
    - Return the response as a JSON object.
    - The JSON should have a single key: "tool_name".
    - The value should be the exact tool/software/platform name (e.g., "Tableau", "Google Analytics").
    - If no tool is mentioned, set the value as "None".
    - Do NOT include a list or any extra text.

    Training Need: "{text}"

    Example JSON Response:
    {{
        "tool_name": "Tableau"
    }}
    If no tool is mentioned:
    {{
        "tool_name": "None"
    }}
    """

    try:
        response = ollama.chat(model="tinyllama", messages=[{"role": "user", "content": prompt}])

        if not response or "message" not in response or not response["message"].get("content"):
            return None  # Fallback for empty responses

        tool_data = json.loads(response["message"]["content"])  # Parse JSON response

        if isinstance(tool_data, list) and len(tool_data) > 0:
            tool_data = tool_data[0]  # Extract first dictionary if response is a list

        tool_name = tool_data.get("tool_name", "None")  # Extract tool name

    except (json.JSONDecodeError, AttributeError, TypeError):
        tool_name = "None"  # Handle unexpected response formats

    return tool_name if isinstance(tool_name, str) and tool_name.lower() != "none" else None


# Function to categorize training needs into broad skill categories
def categorize_broad_skills(text):
    prompt = f"""
    Classify the following training need into one of the broader skill categories based on the tool or skill mentioned.

    Possible categories:
    - "Technical Skill" (if it involves programming, data analysis, cloud computing, or software tools)
    - "Marketing Skill" (if it involves SEO, advertising, social media, branding, or digital marketing)
    - "Management Skill" (if it involves project management, leadership, team coordination, or planning)
    - "Social Skill" (if it involves communication, negotiation, emotional intelligence, or customer relations)
    - "Creative Skill" (if it involves design, content creation, video editing, or branding)

    Return the response as a JSON object:
    {{
        "category": "Technical Skill"
    }}

    Training Need: "{text}"
    """

    try:
        response = ollama.chat(model="tinyllama", messages=[{"role": "user", "content": prompt}])

        if not response or "message" not in response or not response["message"].get("content"):
            return "Uncategorized"

        category_data = json.loads(response["message"]["content"])  # Parse JSON response

        if isinstance(category_data, list) and len(category_data) > 0:
            category_data = category_data[0]  # Extract first dictionary if response is a list

        category = category_data.get("category", "Uncategorized")  # Extract category safely

    except (json.JSONDecodeError, AttributeError, TypeError):
        category = "Uncategorized"  # Handle unexpected response formats

    valid_categories = [
        "Technical Skill", "Marketing Skill", "Management Skill",
        "Social Skill", "Creative Skill"
    ]
    return category if category in valid_categories else "Uncategorized"


# **SPEED BOOST: Run API Calls in Parallel (Multithreading)**
def process_column_parallel(func, data, max_workers=10):
    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(func, data))
    return results


# **Run API Calls Faster using Multiple Threads**
df["Extracted_Tool"] = process_column_parallel(extract_tools, df["Training_Needs"], max_workers=10)
df["Broad_Skill_Category"] = process_column_parallel(categorize_broad_skills, df["Training_Needs"], max_workers=10)

# Save the updated dataset
csv_filename_updated = "/Users/amandeepsinghgill/Desktop/DS Projects/Datasets/Training Needs AI /marketing_research_training_data_filtered_200.csv"
df.to_csv(csv_filename_updated, index=False)

# Display the first few rows to verify
print(df.head())


   Employee_ID Last_Name                                Email         Role  \
0            1     Smith     emma.smith@marketingresearch.com   Specialist   
1            2     Smith    laura.smith@marketingresearch.com      Manager   
2            3    Garcia   emily.garcia@marketingresearch.com  Coordinator   
3            4  Williams  emma.williams@marketingresearch.com  Coordinator   
4            5  Martinez  jane.martinez@marketingresearch.com  Coordinator   

        Department                                     Training_Needs  \
0   Data Analytics  Interested in teamwork strategies with tools l...   
1   Data Analytics  Requires advanced training in SEMrush for auto...   
2  Market Research  Requires emotional intelligence training to co...   
3   Brand Strategy  Seeking training on communication skills and u...   
4  Survey Analysis  Looking for specialized knowledge in AWS for s...   

  Training_Type    Extracted_Tool Broad_Skill_Category  
0    Soft Skill              None  