# AI Engineering Jobs Analysis

Analysis of 895 job descriptions extracted from builtin.com.

Searched for jobs containing "AI Engineer" keyword from LA, NY, London, Amsterdam and Berlin for last 4 weeks. Done at the beginning of February 2026, so it mostly contains jobs published in January 2026.

In [None]:
import yaml
import pandas as pd
from pathlib import Path
from collections import Counter

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_colwidth', 80)

In [None]:
# Load all structured YAML files into a flat DataFrame
STRUCTURED_DIR = Path('data_structured')

records = []
for file in STRUCTURED_DIR.glob('*.yaml'):
    try:
        with open(file, 'r', encoding='utf-8') as f:
            job = yaml.safe_load(f)
        pos = job.get('position', {})
        comp = job.get('company', {})
        skills = pos.get('skills', {})
        records.append({
            'job_id': job.get('meta', {}).get('job_id', ''),
            'company': comp.get('name', ''),
            'stage': comp.get('stage', ''),
            'focus': comp.get('focus', ''),
            'title': pos.get('title', ''),
            'ai_type': pos.get('ai_type', {}).get('type', 'unknown'),
            'is_customer_facing': pos.get('is_customer_facing', False),
            'is_management': pos.get('is_management', False),
            'responsibilities': pos.get('responsibilities', []),
            'use_cases': comp.get('use_cases', pos.get('use_cases', [])),
            'skills_genai': skills.get('genai', []),
            'skills_ml': skills.get('ml', []),
            'skills_web': skills.get('web', []),
            'skills_databases': skills.get('databases', []),
            'skills_data': skills.get('data', []),
            'skills_cloud': skills.get('cloud', []),
            'skills_ops': skills.get('ops', []),
            'skills_languages': skills.get('languages', []),
            'skills_domains': skills.get('domains', []),
            'skills_other': skills.get('other', []),
        })
    except Exception as e:
        print(f'Error loading {file}: {e}')

df = pd.DataFrame(records)

# Ensure list columns are actually lists
list_cols = [c for c in df.columns if c.startswith('skills_')] + ['responsibilities', 'use_cases']
for col in list_cols:
    df[col] = df[col].apply(lambda x: x if isinstance(x, list) else [])

print(f'Loaded {len(df)} jobs')
df.head(3)

In [None]:
# Helper: explode a skill list column into counts
def skill_counts(df_subset, col):
    return df_subset[col].explode().dropna().value_counts()

# Helper: count jobs that have a specific skill (substring match) across all skill columns
def jobs_with_skill(df_subset, skill_name):
    skill_lower = skill_name.lower()
    mask = df_subset.apply(
        lambda row: any(
            skill_lower in s.lower()
            for col in SKILL_COLS
            for s in (row[col] if isinstance(row[col], list) else [])
        ), axis=1
    )
    return mask.sum()

SKILL_COLS = [c for c in df.columns if c.startswith('skills_')]

def all_skills_lower(row):
    """Get all skills from a row as lowercase strings."""
    out = []
    for col in SKILL_COLS:
        if isinstance(row[col], list):
            out.extend(s.lower() for s in row[col])
    return out

# Subsets
ai_first = df[df['ai_type'] == 'ai-first']
ai_support = df[df['ai_type'] == 'ai-support']
ml_first = df[df['ai_type'] == 'ml-first']

print(f'AI-First: {len(ai_first)}, AI-Support: {len(ai_support)}, ML: {len(ml_first)}')

## Job Type Distribution

In [None]:
type_counts = df['ai_type'].value_counts()
type_pct = (type_counts / len(df) * 100).round(1)
pd.DataFrame({'jobs': type_counts, '%': type_pct})

## Dataset Statistics

In [None]:
print(f'Unique companies: {df["company"].nunique()}')
print(f'\nTop 20 companies by job count:')
df['company'].value_counts().head(20)

In [None]:
stage_counts = df[df['stage'] != '']['stage'].value_counts()
stage_pct = (stage_counts / len(df) * 100).round(1)
pd.DataFrame({'jobs': stage_counts, '%': stage_pct})

In [None]:
print(f'Customer-facing roles: {df["is_customer_facing"].sum()} ({df["is_customer_facing"].mean()*100:.1f}%)')
print(f'Management roles: {df["is_management"].sum()} ({df["is_management"].mean()*100:.1f}%)')
print(f'\nMost common job titles:')
df['title'].value_counts().head(10)

## Skills Analysis

In [None]:
n = len(df)
genai = skill_counts(df, 'skills_genai')
print('Top GenAI skills:')
pd.DataFrame({'jobs': genai.head(10), '%': (genai.head(10) / n * 100).round(1)})

In [None]:
print('Top ML skills:')
skill_counts(df, 'skills_ml').head(10)

In [None]:
print('Top web skills:')
skill_counts(df, 'skills_web').head(10)

In [None]:
print('Top database skills:')
skill_counts(df, 'skills_databases').head(10)

In [None]:
print('Top cloud skills:')
skill_counts(df, 'skills_cloud').head(5)

In [None]:
print('Top ops skills:')
skill_counts(df, 'skills_ops').head(10)

In [None]:
langs = skill_counts(df, 'skills_languages')
print('Top languages:')
pd.DataFrame({'jobs': langs.head(10), '%': (langs.head(10) / n * 100).round(1)})

## GenAI Framework Ecosystem

In [None]:
frameworks = ['LangChain', 'LangGraph', 'LlamaIndex', 'CrewAI', 'AutoGen']
genai_all = skill_counts(df, 'skills_genai')
fw = genai_all[genai_all.index.isin(frameworks)].reindex(frameworks).dropna().astype(int)
pd.DataFrame({'jobs': fw, '%': (fw / n * 100).round(1)})

## Supporting Roles: What AI-Support Engineers Do

In [None]:
def categorize_support_role(row):
    title = row['title'].lower()
    resp = ' '.join(row['responsibilities']).lower()
    categories = {
        'Platform/Infrastructure': ['platform', 'infrastructure', 'infra', 'mlops', 'kubernetes', 'k8s', 'deployment'],
        'Data/Pipelines': ['data engineer', 'data pipeline', 'etl', 'data platform'],
        'Sales/Solutions': ['sales', 'solutions', 'presales', 'customer success'],
        'Backend/General SWE': ['backend', 'api', 'microservices', 'internal tools'],
        'Frontend/UI': ['frontend', 'ui', 'ux', 'full-stack'],
    }
    for cat, keywords in categories.items():
        if any(kw in title or kw in resp for kw in keywords):
            return cat
    return 'Other'

support = ai_support.copy()
support['category'] = support.apply(categorize_support_role, axis=1)

print(f'{len(ai_support)} jobs ({len(ai_support)/len(df)*100:.1f}%) classified as AI-Support\n')
support['category'].value_counts()

In [None]:
# Do AI-Support roles need GenAI knowledge?
has_genai = ai_support['skills_genai'].apply(len) > 0
print(f'AI-Support roles with GenAI skills: {has_genai.sum()}/{len(ai_support)} ({has_genai.mean()*100:.1f}%)')
print(f'AI-Support roles without GenAI skills: {(~has_genai).sum()}/{len(ai_support)} ({(~has_genai).mean()*100:.1f}%)')

print('\nGenAI skills in AI-Support roles:')
support_genai = skill_counts(ai_support, 'skills_genai')
pd.DataFrame({'jobs': support_genai.head(10), '%': (support_genai.head(10) / len(ai_support) * 100).round(1)})

### Skill Comparison: AI-First vs AI-Support

In [None]:
compare_skills = ['RAG', 'prompt engineering', 'agents', 'LangChain', 'Docker', 'Kubernetes', 'AWS', 'React']

rows = []
for skill in compare_skills:
    af = jobs_with_skill(ai_first, skill)
    asp = jobs_with_skill(ai_support, skill)
    rows.append({
        'skill': skill,
        'AI-First': f'{af/len(ai_first)*100:.1f}%',
        'AI-Support': f'{asp/len(ai_support)*100:.1f}%',
    })

pd.DataFrame(rows).set_index('skill')

## Research vs Applied Roles

In [None]:
def is_research_role(row):
    title = row['title'].lower()
    resp = ' '.join(row['responsibilities']).lower()
    use_cases = ' '.join(row['use_cases']).lower()

    research_indicators = [
        'research', 'scientist', 'publication', 'paper', 'novel',
        'algorithm', 'architecture development', 'model architecture',
        'training methods', 'safety research', 'rl research',
        'reinforcement learning', 'world model', 'control theory',
        'experimental', 'push sota', 'state of the art'
    ]
    non_research_indicators = [
        'production', 'deploy', 'shipping', 'product',
        'customer', 'enterprise', 'api integration',
        'fine-tuning existing', 'apply', 'implement'
    ]

    if any(kw in title for kw in ['research engineer', 'scientist', 'research scientist']):
        return True

    all_text = f'{resp} {use_cases}'
    r_score = sum(1 for kw in research_indicators if kw in all_text)
    nr_score = sum(1 for kw in non_research_indicators if kw in all_text)
    return r_score > nr_score and r_score >= 2

df['is_research'] = df.apply(is_research_role, axis=1)
research_count = df['is_research'].sum()

pd.DataFrame([
    {'Role Type': 'Research', 'Jobs': research_count, '%': f'{research_count/len(df)*100:.1f}%'},
    {'Role Type': 'Applied/Production', 'Jobs': len(df) - research_count, '%': f'{(len(df)-research_count)/len(df)*100:.1f}%'},
]).set_index('Role Type')

In [None]:
print('Sample research titles:')
df[df['is_research']]['title'].drop_duplicates().head(15).tolist()

## What Other Titles Do "AI Engineers" Go Under?

In [None]:
def normalize_title(title):
    t = title.lower()
    for kw in ['senior', 'staff', 'principal', 'lead', 'junior', 'sr.', 'sr', 'iii', 'ii']:
        t = t.replace(kw, '').strip()
    return ' '.join(t.split())

df['norm_title'] = df['title'].apply(normalize_title)

# Group by normalized title and ai_type
title_groups = df.groupby('norm_title')['ai_type'].value_counts().unstack(fill_value=0)
title_groups['total'] = title_groups.sum(axis=1)
title_groups = title_groups[title_groups['total'] >= 3]

# Strongly AI-First titles (75%+)
if 'ai-first' in title_groups.columns:
    title_groups['ai_first_pct'] = (title_groups['ai-first'] / title_groups['total'] * 100).round(0)
    strongly_ai_first = title_groups[title_groups['ai_first_pct'] >= 75].sort_values('total', ascending=False)
    print('Strongly AI-First titles (75%+ AI-First):')
    print(strongly_ai_first[['total', 'ai_first_pct']].head(10).to_string())

# Strongly AI-Support titles (75%+)
if 'ai-support' in title_groups.columns:
    title_groups['ai_support_pct'] = (title_groups['ai-support'] / title_groups['total'] * 100).round(0)
    strongly_support = title_groups[title_groups['ai_support_pct'] >= 75].sort_values('total', ascending=False)
    print('\nStrongly AI-Support titles (75%+ AI-Support):')
    print(strongly_support[['total', 'ai_support_pct']].head(10).to_string())

## How Much ML Do AI Engineers Need to Know?

In [None]:
ml_skills_list = [
    'PyTorch', 'TensorFlow', 'Keras', 'JAX', 'scikit-learn', 'XGBoost',
    'LightGBM', 'fine-tuning', 'model training', 'model evaluation',
    'embeddings', 'deep learning', 'machine learning', 'neural networks',
    'optimization', 'CUDA', 'transformers', 'huggingface'
]

def has_any_ml_skill(row):
    skills = all_skills_lower(row)
    return any(ml.lower() in s for s in skills for ml in ml_skills_list)

ai_first_ml = ai_first.apply(has_any_ml_skill, axis=1)
print(f'{ai_first_ml.mean()*100:.1f}% of AI-First roles require some ML knowledge')

# Most common ML skills in AI-First roles
def count_ml_skill(skill_name):
    skill_lower = skill_name.lower()
    return ai_first.apply(
        lambda row: any(skill_lower in s for s in all_skills_lower(row)), axis=1
    ).sum()

ml_counts = {s: count_ml_skill(s) for s in ml_skills_list}
ml_df = pd.Series(ml_counts).sort_values(ascending=False)
ml_df = ml_df[ml_df > 0]
pd.DataFrame({'jobs': ml_df, '%': (ml_df / len(ai_first) * 100).round(1)}).head(10)

## What Else (Besides GenAI) Do AI Engineers Need?

In [None]:
n_af = len(ai_first)

has_genai_col = ai_first['skills_genai'].apply(len) > 0
has_ml = ai_first['skills_ml'].apply(len) > 0
has_web = ai_first['skills_web'].apply(len) > 0
has_ops = ai_first['skills_ops'].apply(len) > 0
has_cloud = ai_first['skills_cloud'].apply(len) > 0
has_data = ai_first['skills_data'].apply(len) > 0
has_db = ai_first['skills_databases'].apply(len) > 0
has_any_other = has_ml | has_web | has_ops | has_cloud | has_data | has_db

combos = {
    'GenAI + Ops (Docker, K8s, CI/CD)': (has_genai_col & has_ops).sum(),
    'GenAI + ML skills': (has_genai_col & has_ml).sum(),
    'GenAI + Web skills': (has_genai_col & has_web).sum(),
    'GenAI + ANY other tech': (has_genai_col & has_any_other).sum(),
    'Pure GenAI (nothing else)': (has_genai_col & ~has_any_other).sum(),
}

combo_df = pd.Series(combos)
pd.DataFrame({'jobs': combo_df, '%': (combo_df / n_af * 100).round(1)})

In [None]:
# Non-GenAI skills by category for AI-First roles
for cat in ['web', 'cloud', 'ops', 'languages', 'databases', 'data']:
    col = f'skills_{cat}'
    counts = skill_counts(ai_first, col)
    if len(counts) > 0:
        top = counts.head(6)
        pcts = (top / n_af * 100).round(1)
        print(f'\n{cat.upper()}:')
        for skill, count in top.items():
            print(f'  {skill}: {count} ({pcts[skill]}%)')

In [None]:
# Full-stack expectations for AI-First roles
def has_frontend_skills(row):
    skills = all_skills_lower(row)
    return any(kw in s for s in skills for kw in ['react', 'vue', 'next.js', 'frontend', 'typescript', 'javascript'])

def has_backend_skills(row):
    skills = all_skills_lower(row)
    return any(kw in s for s in skills for kw in ['fastapi', 'flask', 'django', 'api', 'graphql', 'rest'])

fe = ai_first.apply(has_frontend_skills, axis=1)
be = ai_first.apply(has_backend_skills, axis=1)
fs = fe & be

print(f'Frontend skills: {fe.sum()}/{n_af} ({fe.mean()*100:.1f}%)')
print(f'Backend skills: {be.sum()}/{n_af} ({be.mean()*100:.1f}%)')
print(f'Full-stack (both): {fs.sum()}/{n_af} ({fs.mean()*100:.1f}%)')

## Fine-Tuning Requirements

In [None]:
ft_keywords = ['fine-tun', 'finetun', 'fine tun', 'custom model', 'specialized model',
               'domain-specific', 'adaptation', 'lora', 'qlora', 'peft', 'instruction tuning']

def get_all_text(row):
    parts = [row['title'], ' '.join(row['responsibilities']), ' '.join(row['use_cases'])]
    for col in SKILL_COLS:
        if isinstance(row[col], list):
            parts.extend(row[col])
    return ' '.join(parts).lower()

ai_first_texts = ai_first.apply(get_all_text, axis=1)
has_ft = ai_first_texts.apply(lambda t: any(kw in t for kw in ft_keywords))

print(f'{has_ft.mean()*100:.1f}% of AI-First roles mention fine-tuning')

# Depth of fine-tuning
primary_ft_kw = ['lora', 'qlora', 'peft']

def ft_depth(text):
    if not any(kw in text for kw in ft_keywords):
        return 'No FT mentioned'
    if any(kw in text for kw in primary_ft_kw) or text.count('fine-tun') + text.count('finetun') >= 2:
        return 'Primary FT responsibility'
    return 'Secondary/occasional FT'

ft_levels = ai_first_texts.apply(ft_depth).value_counts()
pd.DataFrame({'jobs': ft_levels, '%': (ft_levels / len(ai_first) * 100).round(1)})

In [None]:
# Fine-tuning use cases
ft_use_case_categories = {
    'Instruction following': ['instruction', 'task', 'command', 'reasoning', 'agent'],
    'Domain knowledge': ['domain', 'industry', 'vertical', 'medical', 'legal', 'finance', 'healthcare', 'scientific'],
    'Style/Tone': ['style', 'tone', 'voice', 'brand', 'personality', 'format'],
    'Company data': ['company', 'internal', 'proprietary', 'organization'],
    'Performance': ['faster', 'smaller', 'efficiency', 'latency', 'cost', 'optimize'],
    'Language': ['language', 'translation', 'multilingual', 'non-english'],
    'Privacy': ['privacy', 'on-premise', 'local', 'offline', 'secure'],
}

ft_jobs = ai_first[has_ft.values]
all_ucs = ft_jobs['use_cases'].explode().dropna()

uc_cats = Counter()
for uc in all_ucs:
    uc_lower = uc.lower()
    for cat, kws in ft_use_case_categories.items():
        if any(kw in uc_lower for kw in kws):
            uc_cats[cat] += 1
            break

print('Fine-tuning use cases:')
for cat, count in sorted(uc_cats.items(), key=lambda x: -x[1]):
    print(f'  {cat}: {count}')