# AI Engineering Jobs Analysis

Analysis of 895 job descriptions extracted from builtin.com.

Searched for jobs containing "AI Engineer" keyword from LA, NY, London, Amsterdam and Berlin for last 4 weeks. Done at the beginning of February 2026, so it mostly contains jobs published in January 2026.

In [1]:
import yaml
import pandas as pd
from pathlib import Path
from collections import Counter

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_colwidth', 80)

In [2]:
# Load all structured YAML files into a flat DataFrame
STRUCTURED_DIR = Path('data_structured')

records = []
for file in STRUCTURED_DIR.glob('*.yaml'):
    try:
        with open(file, 'r', encoding='utf-8') as f:
            job = yaml.safe_load(f)

        pos = job.get('position', {})
        comp = job.get('company', {})
        skills = pos.get('skills', {})
        records.append({
            'job_id': job.get('meta', {}).get('job_id', ''),
            'company': comp.get('name', ''),
            'stage': comp.get('stage', ''),
            'focus': comp.get('focus', ''),
            'title': pos.get('title', ''),
            'ai_type': pos.get('ai_type', {}).get('type', 'unknown'),
            'is_customer_facing': pos.get('is_customer_facing', False),
            'is_management': pos.get('is_management', False),
            'responsibilities': pos.get('responsibilities', []),
            'use_cases': comp.get('use_cases', pos.get('use_cases', [])),
            'skills_genai': skills.get('genai', []),
            'skills_ml': skills.get('ml', []),
            'skills_web': skills.get('web', []),
            'skills_databases': skills.get('databases', []),
            'skills_data': skills.get('data', []),
            'skills_cloud': skills.get('cloud', []),
            'skills_ops': skills.get('ops', []),
            'skills_languages': skills.get('languages', []),
            'skills_domains': skills.get('domains', []),
            'skills_other': skills.get('other', []),
        })
    except Exception as e:
        print(f'Error loading {file}: {e}')

In [3]:

df = pd.DataFrame(records)

# Ensure list columns are actually lists
list_cols = [c for c in df.columns if c.startswith('skills_')] + ['responsibilities', 'use_cases']
for col in list_cols:
    df[col] = df[col].apply(lambda x: x if isinstance(x, list) else [])

print(f'Loaded {len(df)} jobs')
df.head(3)

Loaded 895 jobs


Unnamed: 0,job_id,company,stage,focus,title,ai_type,is_customer_facing,is_management,responsibilities,use_cases,skills_genai,skills_ml,skills_web,skills_databases,skills_data,skills_cloud,skills_ops,skills_languages,skills_domains,skills_other
0,8019065,Scalable Capital,Series B or later,Digital investment and banking platform,AI Software Engineer – Python (m/f/x),ai-first,True,False,[Develop and maintain AI/LLM services for internal and potential client-faci...,[Integrating AI capabilities into real-world financial applications and inve...,"[LangGraph, OpenAI API, Anthropic API, agents, RAG]",[],[FastAPI],[],[],[AWS],"[Terraform, Docker, CI/CD, monitoring]",[Python],[],[]
1,6880819,Stripe,Public,Financial infrastructure platform for payments and business\nrevenue,"Staff Product Designer, Developer AI",ai-support,True,False,[Design and prototype innovative new developer tools and experiences powered...,[Streamline and automate building and maintaining Stripe integrations using ...,"[LLMs, MCP server, AI engineering techniques, Claude Code, v0]",[],[API design],[],[],[],[],[],[],"[Prototyping, Design engineering, Developer tools]"
2,7298439,hyperexponential,Series B,AI-powered platform for insurance pricing and underwriting,Senior Technical Writer/Documentation Engineer - AI,ai-support,True,False,"[Designing and delivering structured, AI-powered documentation that makes hx...","[AI-powered insurance pricing and underwriting platform, Domain-specific AI ...","[LLMs, Claude]",[],[],[],[content pipelines],[],[],[],[],"[Zapier, n8n, Grammarly, documentation systems, technical writing, automatio..."


In [20]:
# Helper: explode a skill list column into counts
def skill_counts(df_subset, col):
    return df_subset[col].explode().dropna().value_counts()

# Helper: count jobs that have a specific skill (substring match) across all skill columns
def jobs_with_skill(df_subset, skill_name):
    skill_lower = skill_name.lower()
    mask = df_subset.apply(
        lambda row: any(
            skill_lower in s.lower()
            for col in SKILL_COLS
            for s in (row[col] if isinstance(row[col], list) else [])
        ), axis=1
    )
    return mask.sum()

SKILL_COLS = [c for c in df.columns if c.startswith('skills_')]

def all_skills_lower(row):
    """Get all skills from a row as lowercase strings."""
    out = []
    for col in SKILL_COLS:
        if isinstance(row[col], list):
            out.extend(s.lower() for s in row[col])
    return out

# Subsets
ai_first = df[df['ai_type'] == 'ai-first']
ai_support = df[df['ai_type'] == 'ai-support']
ml_first = df[df['ai_type'] == 'ml-first']

print(f'AI-First: {len(ai_first)}, AI-Support: {len(ai_support)}, ML: {len(ml_first)}')

AI-First: 621, AI-Support: 255, ML: 16


## Job Type Distribution

In [7]:
type_counts = df['ai_type'].value_counts()
type_pct = (type_counts / len(df) * 100).round(1)
pd.DataFrame({'jobs': type_counts, '%': type_pct})

Unnamed: 0_level_0,jobs,%
ai_type,Unnamed: 1_level_1,Unnamed: 2_level_1
ai-first,621,69.4
ai-support,255,28.5
ml-first,16,1.8
unknown,3,0.3


## Dataset Statistics

In [8]:
print(f'Unique companies: {df["company"].nunique()}')
print(f'\nTop 20 companies by job count:')
df['company'].value_counts().head(20)

Unique companies: 590

Top 20 companies by job count:


company
Capital One                        28
G2i                                15
Scale AI                           10
Thomson Reuters                    10
GEICO                              10
EvolutionIQ                         7
Traversal                           7
Mistral AI                          7
OpenAI                              7
Anthropic                           7
NVIDIA                              6
Speechify                           6
SentinelOne                         6
Coinbase                            5
PwC                                 5
Helsing                             5
Wolters Kluwer                      5
Samsara                             5
New York Life Insurance Company     5
Cloudflare                          4
Name: count, dtype: int64

In [9]:
stage_counts = df[df['stage'] != '']['stage'].value_counts()
stage_pct = (stage_counts / len(df) * 100).round(1)
pd.DataFrame({'jobs': stage_counts, '%': stage_pct})

Unnamed: 0_level_0,jobs,%
stage,Unnamed: 1_level_1,Unnamed: 2_level_1
Public,293,32.7
Series B,126,14.1
Series A,59,6.6
Seed,36,4.0
Series B+,27,3.0
...,...,...
Series B+ ($200M+ funding),1,0.1
"Series B+ (180+ professionals, globally distributed)",1,0.1
"Series D+ (Public company-equivalent, founded 2009)",1,0.1
Public (FTSE250),1,0.1


In [10]:
print(f'Customer-facing roles: {df["is_customer_facing"].sum()} ({df["is_customer_facing"].mean()*100:.1f}%)')
print(f'Management roles: {df["is_management"].sum()} ({df["is_management"].mean()*100:.1f}%)')
print(f'\nMost common job titles:')
df['title'].value_counts().head(10)

Customer-facing roles: 231 (25.8%)
Management roles: 155 (17.3%)

Most common job titles:


title
AI Engineer              53
Senior AI Engineer       31
Applied AI Engineer      20
Lead AI Engineer         10
Staff AI Engineer         8
AI/ML Engineer            7
Principal AI Engineer     6
AI Product Engineer       5
Senior AI/ML Engineer     5
AI Research Engineer      5
Name: count, dtype: int64

## Skills Analysis

In [11]:
n = len(df)
genai = skill_counts(df, 'skills_genai')
print('Top GenAI skills:')
pd.DataFrame({'jobs': genai.head(10), '%': (genai.head(10) / n * 100).round(1)})

Top GenAI skills:


Unnamed: 0_level_0,jobs,%
skills_genai,Unnamed: 1_level_1,Unnamed: 2_level_1
RAG,321,35.9
prompt engineering,260,29.1
LLMs,227,25.4
LangChain,168,18.8
agents,129,14.4
OpenAI API,78,8.7
LangGraph,72,8.0
LlamaIndex,52,5.8
Anthropic API,49,5.5
embeddings,48,5.4


In [12]:
print('Top ML skills:')
skill_counts(df, 'skills_ml').head(10)

Top ML skills:


skills_ml
PyTorch             197
TensorFlow          115
fine-tuning          76
model training       57
model evaluation     40
embeddings           33
scikit-learn         33
machine learning     33
deep learning        28
JAX                  23
Name: count, dtype: int64

In [13]:
print('Top web skills:')
skill_counts(df, 'skills_web').head(10)

Top web skills:


skills_web
React            132
FastAPI           96
REST APIs         58
APIs              58
REST              54
API design        42
Next.js           36
microservices     28
GraphQL           26
Node.js           26
Name: count, dtype: int64

In [14]:
print('Top database skills:')
skill_counts(df, 'skills_databases').head(10)

Top database skills:


skills_databases
vector databases    97
PostgreSQL          83
Pinecone            53
Redis               43
Postgres            42
Weaviate            41
SQL                 38
Snowflake           32
pgvector            28
NoSQL               24
Name: count, dtype: int64

In [15]:
print('Top cloud skills:')
skill_counts(df, 'skills_cloud').head(5)

Top cloud skills:


skills_cloud
AWS             359
Azure           214
GCP             205
Google Cloud     35
SageMaker        25
Name: count, dtype: int64

In [16]:
print('Top ops skills:')
skill_counts(df, 'skills_ops').head(10)

Top ops skills:


skills_ops
Docker            277
CI/CD             262
Kubernetes        260
MLOps             107
Terraform         104
MLflow             66
monitoring         57
observability      42
GitHub Actions     37
Prometheus         33
Name: count, dtype: int64

In [17]:
langs = skill_counts(df, 'skills_languages')
print('Top languages:')
pd.DataFrame({'jobs': langs.head(10), '%': (langs.head(10) / n * 100).round(1)})

Top languages:


Unnamed: 0_level_0,jobs,%
skills_languages,Unnamed: 1_level_1,Unnamed: 2_level_1
Python,738,82.5
TypeScript,209,23.4
Java,133,14.9
Go,101,11.3
SQL,88,9.8
JavaScript,79,8.8
C++,71,7.9
C#,56,6.3
Scala,41,4.6
Rust,38,4.2


## GenAI Framework Ecosystem

In [18]:
frameworks = ['LangChain', 'LangGraph', 'LlamaIndex', 'CrewAI', 'AutoGen']
genai_all = skill_counts(df, 'skills_genai')
fw = genai_all[genai_all.index.isin(frameworks)].reindex(frameworks).dropna().astype(int)
pd.DataFrame({'jobs': fw, '%': (fw / n * 100).round(1)})

Unnamed: 0_level_0,jobs,%
skills_genai,Unnamed: 1_level_1,Unnamed: 2_level_1
LangChain,168,18.8
LangGraph,72,8.0
LlamaIndex,52,5.8
CrewAI,28,3.1
AutoGen,17,1.9


## Supporting Roles: What AI-Support Engineers Do

In [21]:
def categorize_support_role(row):
    title = row['title'].lower()
    resp = ' '.join(row['responsibilities']).lower()
    categories = {
        'Platform/Infrastructure': ['platform', 'infrastructure', 'infra', 'mlops', 'kubernetes', 'k8s', 'deployment'],
        'Data/Pipelines': ['data engineer', 'data pipeline', 'etl', 'data platform'],
        'Sales/Solutions': ['sales', 'solutions', 'presales', 'customer success'],
        'Backend/General SWE': ['backend', 'api', 'microservices', 'internal tools'],
        'Frontend/UI': ['frontend', 'ui', 'ux', 'full-stack'],
    }
    for cat, keywords in categories.items():
        if any(kw in title or kw in resp for kw in keywords):
            return cat
    return 'Other'

support = ai_support.copy()
support['category'] = support.apply(categorize_support_role, axis=1)

print(f'{len(ai_support)} jobs ({len(ai_support)/len(df)*100:.1f}%) classified as AI-Support\n')
support['category'].value_counts()

255 jobs (28.5%) classified as AI-Support



category
Platform/Infrastructure    174
Backend/General SWE         22
Sales/Solutions             19
Frontend/UI                 15
Other                       15
Data/Pipelines              10
Name: count, dtype: int64

In [22]:
# Do AI-Support roles need GenAI knowledge?
has_genai = ai_support['skills_genai'].apply(len) > 0
print(f'AI-Support roles with GenAI skills: {has_genai.sum()}/{len(ai_support)} ({has_genai.mean()*100:.1f}%)')
print(f'AI-Support roles without GenAI skills: {(~has_genai).sum()}/{len(ai_support)} ({(~has_genai).mean()*100:.1f}%)')

print('\nGenAI skills in AI-Support roles:')
support_genai = skill_counts(ai_support, 'skills_genai')
pd.DataFrame({'jobs': support_genai.head(10), '%': (support_genai.head(10) / len(ai_support) * 100).round(1)})

AI-Support roles with GenAI skills: 146/255 (57.3%)
AI-Support roles without GenAI skills: 109/255 (42.7%)

GenAI skills in AI-Support roles:


Unnamed: 0_level_0,jobs,%
skills_genai,Unnamed: 1_level_1,Unnamed: 2_level_1
LLMs,39,15.3
RAG,34,13.3
prompt engineering,19,7.5
LangChain,17,6.7
OpenAI API,14,5.5
Anthropic API,10,3.9
Claude Code,7,2.7
Cursor,7,2.7
AI agents,7,2.7
agentic workflows,7,2.7


### Skill Comparison: AI-First vs AI-Support

In [23]:
compare_skills = ['RAG', 'prompt engineering', 'agents', 'LangChain', 'Docker', 'Kubernetes', 'AWS', 'React']

rows = []
for skill in compare_skills:
    af = jobs_with_skill(ai_first, skill)
    asp = jobs_with_skill(ai_support, skill)
    rows.append({
        'skill': skill,
        'AI-First': f'{af/len(ai_first)*100:.1f}%',
        'AI-Support': f'{asp/len(ai_support)*100:.1f}%',
    })

pd.DataFrame(rows).set_index('skill')

Unnamed: 0_level_0,AI-First,AI-Support
skill,Unnamed: 1_level_1,Unnamed: 2_level_1
RAG,50.2%,17.3%
prompt engineering,42.4%,9.0%
agents,33.3%,8.2%
LangChain,24.3%,6.7%
Docker,31.2%,30.6%
Kubernetes,26.4%,36.1%
AWS,43.3%,40.8%
React,14.2%,20.8%


## Research vs Applied Roles

In [24]:
def is_research_role(row):
    title = row['title'].lower()
    resp = ' '.join(row['responsibilities']).lower()
    use_cases = ' '.join(row['use_cases']).lower()

    research_indicators = [
        'research', 'scientist', 'publication', 'paper', 'novel',
        'algorithm', 'architecture development', 'model architecture',
        'training methods', 'safety research', 'rl research',
        'reinforcement learning', 'world model', 'control theory',
        'experimental', 'push sota', 'state of the art'
    ]
    non_research_indicators = [
        'production', 'deploy', 'shipping', 'product',
        'customer', 'enterprise', 'api integration',
        'fine-tuning existing', 'apply', 'implement'
    ]

    if any(kw in title for kw in ['research engineer', 'scientist', 'research scientist']):
        return True

    all_text = f'{resp} {use_cases}'
    r_score = sum(1 for kw in research_indicators if kw in all_text)
    nr_score = sum(1 for kw in non_research_indicators if kw in all_text)
    return r_score > nr_score and r_score >= 2

df['is_research'] = df.apply(is_research_role, axis=1)
research_count = df['is_research'].sum()

pd.DataFrame([
    {'Role Type': 'Research', 'Jobs': research_count, '%': f'{research_count/len(df)*100:.1f}%'},
    {'Role Type': 'Applied/Production', 'Jobs': len(df) - research_count, '%': f'{(len(df)-research_count)/len(df)*100:.1f}%'},
]).set_index('Role Type')

Unnamed: 0_level_0,Jobs,%
Role Type,Unnamed: 1_level_1,Unnamed: 2_level_1
Research,42,4.7%
Applied/Production,853,95.3%


In [25]:
print('Sample research titles:')
df[df['is_research']]['title'].drop_duplicates().head(15).tolist()

Sample research titles:


['Staff AI Engineer',
 'Principal AI Research Engineer - RL',
 'Research Engineer, AI',
 'Entrepreneurial AI Research Engineer',
 'AI Engineer (Applied Scientist) III',
 'Machine Learning Engineer (LLMs / AI)',
 'AI Engineer',
 'Research Scientist/Research Engineer, AI for Secure Code',
 'Sr. AI/ML Engineer III (6273)',
 'AI Product & Research Engineer',
 'AI Research Engineer (Fully Remote; US Based)',
 'AI Research Engineer',
 'Principal AI Research Engineer - World Models',
 'Senior ML Engineer (AI Research)',
 'Staff Level Research Engineer, AI']

## What Other Titles Do "AI Engineers" Go Under?

In [26]:
def normalize_title(title):
    t = title.lower()
    for kw in ['senior', 'staff', 'principal', 'lead', 'junior', 'sr.', 'sr', 'iii', 'ii']:
        t = t.replace(kw, '').strip()
    return ' '.join(t.split())

df['norm_title'] = df['title'].apply(normalize_title)

# Group by normalized title and ai_type
title_groups = df.groupby('norm_title')['ai_type'].value_counts().unstack(fill_value=0)
title_groups['total'] = title_groups.sum(axis=1)
title_groups = title_groups[title_groups['total'] >= 3]

# Strongly AI-First titles (75%+)
if 'ai-first' in title_groups.columns:
    title_groups['ai_first_pct'] = (title_groups['ai-first'] / title_groups['total'] * 100).round(0)
    strongly_ai_first = title_groups[title_groups['ai_first_pct'] >= 75].sort_values('total', ascending=False)
    print('Strongly AI-First titles (75%+ AI-First):')
    print(strongly_ai_first[['total', 'ai_first_pct']].head(10).to_string())

# Strongly AI-Support titles (75%+)
if 'ai-support' in title_groups.columns:
    title_groups['ai_support_pct'] = (title_groups['ai-support'] / title_groups['total'] * 100).round(0)
    strongly_support = title_groups[title_groups['ai_support_pct'] >= 75].sort_values('total', ascending=False)
    print('\nStrongly AI-Support titles (75%+ AI-Support):')
    print(strongly_support[['total', 'ai_support_pct']].head(10).to_string())

Strongly AI-First titles (75%+ AI-First):
ai_type                            total  ai_first_pct
norm_title                                            
ai engineer                          119          97.0
applied ai engineer                   25          88.0
ai/ml engineer                        19          95.0
software engineer, ai                 11          91.0
ai product engineer                    8         100.0
ai solutions engineer                  7          86.0
ai software engineer                   6          83.0
machine learning engineer, gen ai      5         100.0
software engineer, applied ai          5          80.0
ai research engineer                   5         100.0

Strongly AI-Support titles (75%+ AI-Support):
ai_type                     total  ai_support_pct
norm_title                                       
ai platform engineer            5            80.0
ai data engineer                4            75.0
ai infrastructure engineer      3           100.0
a

## How Much ML Do AI Engineers Need to Know?

In [27]:
ml_skills_list = [
    'PyTorch', 'TensorFlow', 'Keras', 'JAX', 'scikit-learn', 'XGBoost',
    'LightGBM', 'fine-tuning', 'model training', 'model evaluation',
    'embeddings', 'deep learning', 'machine learning', 'neural networks',
    'optimization', 'CUDA', 'transformers', 'huggingface'
]

def has_any_ml_skill(row):
    skills = all_skills_lower(row)
    return any(ml.lower() in s for s in skills for ml in ml_skills_list)

ai_first_ml = ai_first.apply(has_any_ml_skill, axis=1)
print(f'{ai_first_ml.mean()*100:.1f}% of AI-First roles require some ML knowledge')

# Most common ML skills in AI-First roles
def count_ml_skill(skill_name):
    skill_lower = skill_name.lower()
    return ai_first.apply(
        lambda row: any(skill_lower in s for s in all_skills_lower(row)), axis=1
    ).sum()

ml_counts = {s: count_ml_skill(s) for s in ml_skills_list}
ml_df = pd.Series(ml_counts).sort_values(ascending=False)
ml_df = ml_df[ml_df > 0]
pd.DataFrame({'jobs': ml_df, '%': (ml_df / len(ai_first) * 100).round(1)}).head(10)

64.3% of AI-First roles require some ML knowledge


Unnamed: 0,jobs,%
PyTorch,165,26.6
fine-tuning,159,25.6
TensorFlow,93,15.0
embeddings,81,13.0
model training,80,12.9
model evaluation,69,11.1
optimization,57,9.2
machine learning,54,8.7
scikit-learn,39,6.3
deep learning,36,5.8


## What Else (Besides GenAI) Do AI Engineers Need?

In [28]:
n_af = len(ai_first)

has_genai_col = ai_first['skills_genai'].apply(len) > 0
has_ml = ai_first['skills_ml'].apply(len) > 0
has_web = ai_first['skills_web'].apply(len) > 0
has_ops = ai_first['skills_ops'].apply(len) > 0
has_cloud = ai_first['skills_cloud'].apply(len) > 0
has_data = ai_first['skills_data'].apply(len) > 0
has_db = ai_first['skills_databases'].apply(len) > 0
has_any_other = has_ml | has_web | has_ops | has_cloud | has_data | has_db

combos = {
    'GenAI + Ops (Docker, K8s, CI/CD)': (has_genai_col & has_ops).sum(),
    'GenAI + ML skills': (has_genai_col & has_ml).sum(),
    'GenAI + Web skills': (has_genai_col & has_web).sum(),
    'GenAI + ANY other tech': (has_genai_col & has_any_other).sum(),
    'Pure GenAI (nothing else)': (has_genai_col & ~has_any_other).sum(),
}

combo_df = pd.Series(combos)
pd.DataFrame({'jobs': combo_df, '%': (combo_df / n_af * 100).round(1)})

Unnamed: 0,jobs,%
"GenAI + Ops (Docker, K8s, CI/CD)",447,72.0
GenAI + ML skills,357,57.5
GenAI + Web skills,305,49.1
GenAI + ANY other tech,578,93.1
Pure GenAI (nothing else),9,1.4


In [29]:
# Non-GenAI skills by category for AI-First roles
for cat in ['web', 'cloud', 'ops', 'languages', 'databases', 'data']:
    col = f'skills_{cat}'
    counts = skill_counts(ai_first, col)
    if len(counts) > 0:
        top = counts.head(6)
        pcts = (top / n_af * 100).round(1)
        print(f'\n{cat.upper()}:')
        for skill, count in top.items():
            print(f'  {skill}: {count} ({pcts[skill]}%)')


WEB:
  FastAPI: 80 (12.9%)
  React: 80 (12.9%)
  APIs: 45 (7.2%)
  REST APIs: 42 (6.8%)
  REST: 36 (5.8%)
  microservices: 24 (3.9%)

CLOUD:
  AWS: 259 (41.7%)
  Azure: 154 (24.8%)
  GCP: 138 (22.2%)
  Google Cloud: 28 (4.5%)
  AWS Bedrock: 20 (3.2%)
  SageMaker: 17 (2.7%)

OPS:
  Docker: 195 (31.4%)
  CI/CD: 172 (27.7%)
  Kubernetes: 165 (26.6%)
  MLOps: 75 (12.1%)
  Terraform: 58 (9.3%)
  monitoring: 48 (7.7%)

LANGUAGES:
  Python: 550 (88.6%)
  TypeScript: 145 (23.3%)
  Java: 94 (15.1%)
  Go: 66 (10.6%)
  SQL: 62 (10.0%)
  C++: 54 (8.7%)

DATABASES:
  vector databases: 86 (13.8%)
  PostgreSQL: 60 (9.7%)
  Pinecone: 47 (7.6%)
  Weaviate: 35 (5.6%)
  Postgres: 31 (5.0%)
  Redis: 28 (4.5%)

DATA:
  data pipelines: 61 (9.8%)
  Airflow: 23 (3.7%)
  Spark: 23 (3.7%)
  Databricks: 22 (3.5%)
  Kafka: 21 (3.4%)
  ETL: 19 (3.1%)


In [30]:
# Full-stack expectations for AI-First roles
def has_frontend_skills(row):
    skills = all_skills_lower(row)
    return any(kw in s for s in skills for kw in ['react', 'vue', 'next.js', 'frontend', 'typescript', 'javascript'])

def has_backend_skills(row):
    skills = all_skills_lower(row)
    return any(kw in s for s in skills for kw in ['fastapi', 'flask', 'django', 'api', 'graphql', 'rest'])

fe = ai_first.apply(has_frontend_skills, axis=1)
be = ai_first.apply(has_backend_skills, axis=1)
fs = fe & be

print(f'Frontend skills: {fe.sum()}/{n_af} ({fe.mean()*100:.1f}%)')
print(f'Backend skills: {be.sum()}/{n_af} ({be.mean()*100:.1f}%)')
print(f'Full-stack (both): {fs.sum()}/{n_af} ({fs.mean()*100:.1f}%)')

Frontend skills: 195/621 (31.4%)
Backend skills: 308/621 (49.6%)
Full-stack (both): 134/621 (21.6%)


## Fine-Tuning Requirements

In [31]:
ft_keywords = ['fine-tun', 'finetun', 'fine tun', 'custom model', 'specialized model',
               'domain-specific', 'adaptation', 'lora', 'qlora', 'peft', 'instruction tuning']

def get_all_text(row):
    parts = [row['title'], ' '.join(row['responsibilities']), ' '.join(row['use_cases'])]
    for col in SKILL_COLS:
        if isinstance(row[col], list):
            parts.extend(row[col])
    return ' '.join(parts).lower()

ai_first_texts = ai_first.apply(get_all_text, axis=1)
has_ft = ai_first_texts.apply(lambda t: any(kw in t for kw in ft_keywords))

print(f'{has_ft.mean()*100:.1f}% of AI-First roles mention fine-tuning')

# Depth of fine-tuning
primary_ft_kw = ['lora', 'qlora', 'peft']

def ft_depth(text):
    if not any(kw in text for kw in ft_keywords):
        return 'No FT mentioned'
    if any(kw in text for kw in primary_ft_kw) or text.count('fine-tun') + text.count('finetun') >= 2:
        return 'Primary FT responsibility'
    return 'Secondary/occasional FT'

ft_levels = ai_first_texts.apply(ft_depth).value_counts()
pd.DataFrame({'jobs': ft_levels, '%': (ft_levels / len(ai_first) * 100).round(1)})

30.8% of AI-First roles mention fine-tuning


Unnamed: 0,jobs,%
No FT mentioned,430,69.2
Primary FT responsibility,117,18.8
Secondary/occasional FT,74,11.9


In [32]:
# Fine-tuning use cases
ft_use_case_categories = {
    'Instruction following': ['instruction', 'task', 'command', 'reasoning', 'agent'],
    'Domain knowledge': ['domain', 'industry', 'vertical', 'medical', 'legal', 'finance', 'healthcare', 'scientific'],
    'Style/Tone': ['style', 'tone', 'voice', 'brand', 'personality', 'format'],
    'Company data': ['company', 'internal', 'proprietary', 'organization'],
    'Performance': ['faster', 'smaller', 'efficiency', 'latency', 'cost', 'optimize'],
    'Language': ['language', 'translation', 'multilingual', 'non-english'],
    'Privacy': ['privacy', 'on-premise', 'local', 'offline', 'secure'],
}

ft_jobs = ai_first[has_ft.values]
all_ucs = ft_jobs['use_cases'].explode().dropna()

uc_cats = Counter()
for uc in all_ucs:
    uc_lower = uc.lower()
    for cat, kws in ft_use_case_categories.items():
        if any(kw in uc_lower for kw in kws):
            uc_cats[cat] += 1
            break

print('Fine-tuning use cases:')
for cat, count in sorted(uc_cats.items(), key=lambda x: -x[1]):
    print(f'  {cat}: {count}')

Fine-tuning use cases:
  Instruction following: 170
  Domain knowledge: 90
  Style/Tone: 52
  Performance: 33
  Company data: 27
  Language: 23
  Privacy: 10


## Evaluation and Production: How Important Are They?

In [33]:
# How often do responsibilities mention evaluation/quality vs production/deployment?
all_resp = df['responsibilities'].explode().dropna()
print(f'Total responsibilities: {len(all_resp)}')

eval_keywords = ['evaluat', 'quality', 'test', 'monitor', 'observ', 'metric', 'hallucinat', 'guardrail', 'bias', 'safety']
prod_keywords = ['deploy', 'production', 'ship', 'release', 'scale', 'reliab', 'latency', 'infra', 'ci/cd', 'docker', 'kubernetes']

def count_resp_keyword(responsibilities, keywords):
    return sum(1 for r in responsibilities if any(kw in r.lower() for kw in keywords))

eval_count = count_resp_keyword(all_resp, eval_keywords)
prod_count = count_resp_keyword(all_resp, prod_keywords)

print(f'\nResponsibilities mentioning evaluation/quality: {eval_count} ({eval_count/len(all_resp)*100:.1f}%)')
print(f'Responsibilities mentioning production/deployment: {prod_count} ({prod_count/len(all_resp)*100:.1f}%)')

# How many JOBS mention evaluation in responsibilities?
def job_mentions(df, keywords):
    return df['responsibilities'].apply(
        lambda resps: any(any(kw in r.lower() for kw in keywords) for r in resps)
    ).sum()

eval_jobs = job_mentions(df, eval_keywords)
prod_jobs = job_mentions(df, prod_keywords)

print(f'\nJobs with evaluation/quality responsibilities: {eval_jobs}/{len(df)} ({eval_jobs/len(df)*100:.1f}%)')
print(f'Jobs with production/deployment responsibilities: {prod_jobs}/{len(df)} ({prod_jobs/len(df)*100:.1f}%)')

Total responsibilities: 5694

Responsibilities mentioning evaluation/quality: 1200 (21.1%)
Responsibilities mentioning production/deployment: 1805 (31.7%)

Jobs with evaluation/quality responsibilities: 680/895 (76.0%)
Jobs with production/deployment responsibilities: 780/895 (87.2%)


In [34]:
# Most common action words in responsibilities
import re

action_words = [
    'build', 'design', 'implement', 'develop', 'deploy', 'maintain',
    'collaborate', 'monitor', 'optimize', 'evaluate', 'test', 'scale',
    'integrate', 'architect', 'manage', 'create', 'lead', 'research',
    'support', 'deliver', 'ship', 'automate', 'improve', 'ensure'
]

word_counts = Counter()
for resp in all_resp:
    resp_lower = resp.lower()
    for word in action_words:
        if re.search(r'\b' + word + r'\w*\b', resp_lower):
            word_counts[word] += 1

action_df = pd.Series(dict(word_counts.most_common()))
pd.DataFrame({
    'mentions': action_df,
    '% of responsibilities': (action_df / len(all_resp) * 100).round(1)
})

Unnamed: 0,mentions,% of responsibilities
design,930,16.3
develop,903,15.9
build,862,15.1
implement,587,10.3
deploy,565,9.9
architect,437,7.7
collaborate,403,7.1
maintain,366,6.4
lead,335,5.9
test,320,5.6


In [35]:
# Evaluation and production/ops as explicit skill requirements
eval_skill_keywords = ['evaluation', 'eval', 'testing', 'quality', 'monitoring', 'observability']
prod_skill_keywords = ['docker', 'kubernetes', 'ci/cd', 'mlops', 'terraform']

def count_jobs_with_skills(df_subset, keywords):
    return df_subset.apply(
        lambda row: any(
            any(kw in s.lower() for kw in keywords)
            for col in SKILL_COLS
            for s in (row[col] if isinstance(row[col], list) else [])
        ), axis=1
    ).sum()

eval_skill_jobs = count_jobs_with_skills(df, eval_skill_keywords)
prod_skill_jobs = count_jobs_with_skills(df, prod_skill_keywords)

print(f'Jobs with evaluation-related skills: {eval_skill_jobs}/{len(df)} ({eval_skill_jobs/len(df)*100:.1f}%)')
print(f'Jobs with production/ops skills: {prod_skill_jobs}/{len(df)} ({prod_skill_jobs/len(df)*100:.1f}%)')

# For AI-First specifically
eval_af = count_jobs_with_skills(ai_first, eval_skill_keywords)
prod_af = count_jobs_with_skills(ai_first, prod_skill_keywords)

print(f'\nAI-First jobs with evaluation skills: {eval_af}/{len(ai_first)} ({eval_af/len(ai_first)*100:.1f}%)')
print(f'AI-First jobs with production/ops skills: {prod_af}/{len(ai_first)} ({prod_af/len(ai_first)*100:.1f}%)')

Jobs with evaluation-related skills: 304/895 (34.0%)
Jobs with production/ops skills: 463/895 (51.7%)

AI-First jobs with evaluation skills: 246/621 (39.6%)
AI-First jobs with production/ops skills: 312/621 (50.2%)
