# OpenAI Jobs Interactive Filter

This notebook provides an interactive UI to filter and explore OpenAI job postings.

In [1]:
# Install required packages if needed
# ! pip install ipywidgets pandas

In [9]:
import json
import re
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from collections import Counter
from typing import List, Dict, Set, Optional

## 1. Load Job Data

In [10]:
# Load the extracted jobs data
with open('all_jobs_extracted.json', 'r', encoding='utf-8') as f:
    raw_data = json.load(f)

jobs = raw_data['jobs']
print(f"Loaded {len(jobs)} jobs")

Loaded 147 jobs


## 2. Extract Skills and Experience from Job Descriptions

In [11]:
def extract_years_experience(text: str) -> Optional[int]:
    """
    Extract years of experience requirement from text.
    Returns the minimum years found, or None if not found.
    """
    if not text:
        return None
    
    # Patterns like "5+ years", "3-5 years", "at least 4 years"
    patterns = [
        r'(\d+)\+?\s*(?:years?|yrs?)\s+(?:of\s+)?(?:professional\s+)?(?:relevant\s+)?(?:software\s+)?(?:engineering\s+)?experience',
        r'(?:at\s+least|minimum\s+of?)\s+(\d+)\s*(?:years?|yrs?)',
        r'(\d+)[-–](\d+)\s*(?:years?|yrs?)\s+(?:of\s+)?experience',
        r'typically\s+(\d+)\+?\s*(?:years?|yrs?)',
    ]
    
    years_found = []
    for pattern in patterns:
        matches = re.findall(pattern, text, re.IGNORECASE)
        for match in matches:
            if isinstance(match, tuple):
                years_found.append(int(match[0]))
            else:
                years_found.append(int(match))
    
    return min(years_found) if years_found else None


def extract_skills(text: str) -> Set[str]:
    """
    Extract skills and technologies mentioned in text.
    Returns a set of skill names (lowercase).
    """
    if not text:
        return set()
    
    text_lower = text.lower()
    skills_found = set()
    
    # Define skill patterns with their canonical names
    skill_patterns = {
        # Programming Languages
        'python': r'\bpython\b',
        'go/golang': r'\b(?:go|golang)\b',
        'rust': r'\brust\b',
        'typescript': r'\btypescript\b',
        'javascript': r'\bjavascript\b',
        'java': r'\bjava\b(?!script)',
        'c++': r'\bc\+\+\b',
        'sql': r'\bsql\b',
        'scala': r'\bscala\b',
        'ruby': r'\bruby\b',
        
        # ML/AI
        'machine learning': r'\bmachine\s+learning\b',
        'deep learning': r'\bdeep\s+learning\b',
        'llm': r'\bllm(?:s)?\b',
        'nlp': r'\bnlp\b|\bnatural\s+language\s+processing\b',
        'pytorch': r'\bpytorch\b',
        'tensorflow': r'\btensorflow\b',
        'reinforcement learning': r'\breinforcement\s+learning\b',
        'computer vision': r'\bcomputer\s+vision\b',
        
        # Infrastructure/DevOps
        'kubernetes': r'\bkubernetes\b|\bk8s\b',
        'docker': r'\bdocker\b',
        'aws': r'\baws\b|\bamazon\s+web\s+services\b',
        'gcp': r'\bgcp\b|\bgoogle\s+cloud\b',
        'azure': r'\bazure\b',
        'linux': r'\blinux\b',
        'ci/cd': r'\bci/?cd\b',
        'terraform': r'\bterraform\b',
        
        # Databases
        'postgresql': r'\bpostgres(?:ql)?\b',
        'mysql': r'\bmysql\b',
        'mongodb': r'\bmongodb\b',
        'redis': r'\bredis\b',
        'elasticsearch': r'\belasticsearch\b',
        
        # Data Engineering
        'spark': r'\bspark\b',
        'kafka': r'\bkafka\b',
        'airflow': r'\bairflow\b',
        'data pipelines': r'\bdata\s+pipeline(?:s)?\b',
        'etl': r'\betl\b',
        
        # Web/API
        'react': r'\breact(?:\.?js)?\b',
        'node.js': r'\bnode(?:\.?js)?\b',
        'fastapi': r'\bfastapi\b',
        'graphql': r'\bgraphql\b',
        'rest api': r'\brest(?:ful)?\s*api(?:s)?\b',
        
        # Architecture/Concepts
        'distributed systems': r'\bdistributed\s+systems?\b',
        'microservices': r'\bmicroservices?\b',
        'system design': r'\bsystem\s+design\b',
        'backend': r'\bbackend\b|\bback-end\b',
        'frontend': r'\bfrontend\b|\bfront-end\b',
        'full-stack': r'\bfull[-\s]?stack\b',
        
        # Soft Skills / Domains
        'a/b testing': r'\ba/?b\s+test(?:ing)?\b',
        'data analysis': r'\bdata\s+analysis\b',
        'security': r'\bsecurity\b',
        'networking': r'\bnetworking\b',
    }
    
    for skill_name, pattern in skill_patterns.items():
        if re.search(pattern, text_lower):
            skills_found.add(skill_name)
    
    return skills_found

In [12]:
# Process all jobs and extract structured data
processed_jobs = []
all_skills_counter = Counter()
all_locations = set()

for job in jobs:
    extracted = job.get('extracted', {})
    original = job.get('original_info', {})
    
    # Get text fields
    about_role = extracted.get('about_the_role', '') or ''
    you_might_thrive = extracted.get('you_might_thrive', [])
    thrive_text = '\n'.join(you_might_thrive) if isinstance(you_might_thrive, list) else (you_might_thrive or '')
    combined_text = about_role + '\n' + thrive_text
    
    # Extract skills
    skills = extract_skills(combined_text)
    for skill in skills:
        all_skills_counter[skill] += 1
    
    # Extract years of experience
    years_exp = extract_years_experience(combined_text)
    
    # Parse compensation
    comp = extracted.get('compensation', '')
    comp_min = None
    if comp:
        match = re.search(r'\$(\d+)K', comp)
        if match:
            comp_min = int(match.group(1))
    
    # Get locations (new field from updated extraction)
    locations = extracted.get('location', [])
    if isinstance(locations, str):
        locations = [locations] if locations else []
    for loc in locations:
        all_locations.add(loc)
    
    # Get team from extracted (more accurate) or original
    team = extracted.get('team') or original.get('team', 'Unknown')
    
    processed_jobs.append({
        'title': extracted.get('title', original.get('title', 'Unknown')),
        'team': team,
        'url': original.get('url', ''),
        'location': locations,  # New field
        'compensation': comp,
        'compensation_min_k': comp_min,
        'years_experience': years_exp,
        'skills': list(skills),
        'about_the_role': about_role,
        'you_might_thrive': thrive_text,
        'about_the_team': extracted.get('about_the_team', ''),
    })

# Create DataFrame
df = pd.DataFrame(processed_jobs)

# Pre-compute search text columns for better performance
df['_search_title'] = df['title'].fillna('').str.lower()
df['_search_role'] = df['about_the_role'].fillna('').str.lower()
df['_search_thrive'] = df['you_might_thrive'].fillna('').str.lower()
df['_search_team'] = df['team'].fillna('').str.lower()
df['_search_all'] = df['_search_title'] + ' ' + df['_search_role'] + ' ' + df['_search_thrive'] + ' ' + df['_search_team']
df['_location_str'] = df['location'].apply(lambda x: ', '.join(x).lower() if x else '')

ALL_LOCATIONS = sorted(list(all_locations))

print(f"Processed {len(df)} jobs")
print(f"\nJobs with years experience data: {df['years_experience'].notna().sum()}")
print(f"Jobs with skills data: {(df['skills'].str.len() > 0).sum()}")
print(f"Jobs with compensation data: {df['compensation'].notna().sum()}")
print(f"Jobs with location data: {(df['location'].str.len() > 0).sum()}")
print(f"Unique locations: {len(ALL_LOCATIONS)}")

Processed 147 jobs

Jobs with years experience data: 57
Jobs with skills data: 123
Jobs with compensation data: 133
Jobs with location data: 146
Unique locations: 8


In [13]:
# Display all extracted skills sorted by frequency
print("=" * 50)
print("ALL EXTRACTED SKILLS (sorted by frequency)")
print("=" * 50)

ALL_SKILLS = [skill for skill, _ in all_skills_counter.most_common()]

for skill, count in all_skills_counter.most_common():
    print(f"  {skill}: {count} jobs")

print(f"\nTotal unique skills: {len(ALL_SKILLS)}")

ALL EXTRACTED SKILLS (sorted by frequency)
  python: 48 jobs
  backend: 43 jobs
  distributed systems: 42 jobs
  security: 30 jobs
  go/golang: 26 jobs
  full-stack: 26 jobs
  machine learning: 25 jobs
  llm: 20 jobs
  kubernetes: 19 jobs
  data pipelines: 16 jobs
  rust: 13 jobs
  react: 13 jobs
  postgresql: 11 jobs
  javascript: 11 jobs
  networking: 11 jobs
  sql: 11 jobs
  terraform: 11 jobs
  typescript: 10 jobs
  ci/cd: 10 jobs
  linux: 9 jobs
  frontend: 9 jobs
  pytorch: 9 jobs
  azure: 8 jobs
  deep learning: 7 jobs
  reinforcement learning: 7 jobs
  mysql: 6 jobs
  kafka: 6 jobs
  data analysis: 5 jobs
  java: 5 jobs
  spark: 5 jobs
  fastapi: 4 jobs
  node.js: 4 jobs
  tensorflow: 4 jobs
  airflow: 2 jobs
  a/b testing: 2 jobs
  system design: 2 jobs
  docker: 2 jobs
  redis: 2 jobs
  etl: 1 jobs
  scala: 1 jobs
  graphql: 1 jobs
  gcp: 1 jobs
  aws: 1 jobs

Total unique skills: 43


## 3. Interactive Job Filter UI

In [14]:
class JobFilter:
    """
    Interactive job filtering widget with optimized performance.
    """
    
    MAX_DISPLAY_ROWS = 50  # Limit displayed rows for performance
    
    def __init__(self, dataframe: pd.DataFrame, all_skills: List[str], all_locations: List[str]):
        self.df = dataframe
        self.all_skills = all_skills
        self.all_locations = all_locations
        self.filtered_df = dataframe.copy()
        
        # Create widgets
        self._create_widgets()
        
    def _create_widgets(self):
        """Create all filter widgets."""
        
        # Keyword include filter
        self.keyword_include = widgets.Text(
            value='',
            placeholder='e.g., distributed, ML, backend',
            description='Include:',
            layout=widgets.Layout(width='500px'),
            style={'description_width': '100px'}
        )
        
        # Keyword exclude filter
        self.keyword_exclude = widgets.Text(
            value='',
            placeholder='e.g., manager, senior, lead',
            description='Exclude:',
            layout=widgets.Layout(width='500px'),
            style={'description_width': '100px'}
        )
        
        # Search in section selector
        self.search_sections = widgets.SelectMultiple(
            options=['Title', 'About Role', 'You Might Thrive', 'Team'],
            value=['Title', 'About Role', 'You Might Thrive'],
            description='Search in:',
            layout=widgets.Layout(width='300px', height='100px'),
            style={'description_width': '100px'}
        )
        
        # Skills multi-select
        self.skills_select = widgets.SelectMultiple(
            options=self.all_skills,
            value=[],
            description='Skills:',
            layout=widgets.Layout(width='300px', height='150px'),
            style={'description_width': '100px'}
        )
        
        # Skills match mode
        self.skills_mode = widgets.RadioButtons(
            options=['Any (OR)', 'All (AND)'],
            value='Any (OR)',
            description='Match:',
            layout=widgets.Layout(width='200px'),
            style={'description_width': '60px'}
        )
        
        # Location filter (new)
        self.location_select = widgets.SelectMultiple(
            options=['All'] + self.all_locations,
            value=['All'],
            description='Location:',
            layout=widgets.Layout(width='250px', height='100px'),
            style={'description_width': '100px'}
        )
        
        # Years of experience slider
        self.years_exp = widgets.IntRangeSlider(
            value=[0, 15],
            min=0,
            max=15,
            step=1,
            description='Years Exp:',
            layout=widgets.Layout(width='400px'),
            style={'description_width': '100px'}
        )
        
        # Min compensation slider
        self.min_comp = widgets.IntSlider(
            value=0,
            min=0,
            max=600,
            step=10,
            description='Min Comp (K):',
            layout=widgets.Layout(width='400px'),
            style={'description_width': '100px'}
        )
        
        # Team filter
        teams = ['All'] + sorted(self.df['team'].dropna().unique().tolist())
        self.team_select = widgets.Dropdown(
            options=teams,
            value='All',
            description='Team:',
            layout=widgets.Layout(width='400px'),
            style={'description_width': '100px'}
        )
        
        # Apply button
        self.apply_btn = widgets.Button(
            description='Apply Filters',
            button_style='primary',
            layout=widgets.Layout(width='150px')
        )
        self.apply_btn.on_click(self._apply_filters)
        
        # Reset button
        self.reset_btn = widgets.Button(
            description='Reset',
            button_style='warning',
            layout=widgets.Layout(width='100px')
        )
        self.reset_btn.on_click(self._reset_filters)
        
        # Output area
        self.output = widgets.Output()
    
    def _apply_filters(self, btn=None):
        """Apply all filters using vectorized operations for performance."""
        mask = pd.Series([True] * len(self.df), index=self.df.index)
        
        # Get filter values
        include_keywords = [k.strip().lower() for k in self.keyword_include.value.split(',') if k.strip()]
        exclude_keywords = [k.strip().lower() for k in self.keyword_exclude.value.split(',') if k.strip()]
        sections = list(self.search_sections.value)
        selected_skills = list(self.skills_select.value)
        skills_mode = self.skills_mode.value
        years_range = self.years_exp.value
        min_comp = self.min_comp.value
        team = self.team_select.value
        selected_locations = [loc for loc in self.location_select.value if loc != 'All']
        
        # Build search column based on selected sections (vectorized)
        search_cols = []
        if 'Title' in sections:
            search_cols.append('_search_title')
        if 'About Role' in sections:
            search_cols.append('_search_role')
        if 'You Might Thrive' in sections:
            search_cols.append('_search_thrive')
        if 'Team' in sections:
            search_cols.append('_search_team')
        
        if search_cols:
            search_text = self.df[search_cols[0]].fillna('')
            for col in search_cols[1:]:
                search_text = search_text + ' ' + self.df[col].fillna('')
        else:
            search_text = self.df['_search_all']
        
        # Apply keyword include filter (vectorized)
        if include_keywords:
            include_mask = pd.Series([False] * len(self.df), index=self.df.index)
            for kw in include_keywords:
                include_mask = include_mask | search_text.str.contains(kw, na=False, regex=False)
            mask = mask & include_mask
        
        # Apply keyword exclude filter (vectorized)
        if exclude_keywords:
            for kw in exclude_keywords:
                mask = mask & ~search_text.str.contains(kw, na=False, regex=False)
        
        # Apply skills filter
        if selected_skills:
            if skills_mode == 'Any (OR)':
                skills_mask = self.df['skills'].apply(lambda x: any(s in x for s in selected_skills))
            else:  # All (AND)
                skills_mask = self.df['skills'].apply(lambda x: all(s in x for s in selected_skills))
            mask = mask & skills_mask
        
        # Apply location filter (new)
        if selected_locations:
            location_mask = self.df['location'].apply(
                lambda x: any(loc in x for loc in selected_locations)
            )
            mask = mask & location_mask
        
        # Apply years experience filter (vectorized)
        if years_range != (0, 15):
            years_mask = (
                self.df['years_experience'].isna() |
                ((self.df['years_experience'] >= years_range[0]) & 
                 (self.df['years_experience'] <= years_range[1]))
            )
            mask = mask & years_mask
        
        # Apply compensation filter (vectorized)
        if min_comp > 0:
            comp_mask = (
                self.df['compensation_min_k'].isna() |
                (self.df['compensation_min_k'] >= min_comp)
            )
            mask = mask & comp_mask
        
        # Apply team filter
        if team != 'All':
            mask = mask & (self.df['team'] == team)
        
        self.filtered_df = self.df[mask]
        self._display_results()
    
    def _reset_filters(self, btn=None):
        """Reset all filters."""
        self.keyword_include.value = ''
        self.keyword_exclude.value = ''
        self.search_sections.value = ['Title', 'About Role', 'You Might Thrive']
        self.skills_select.value = []
        self.skills_mode.value = 'Any (OR)'
        self.location_select.value = ['All']
        self.years_exp.value = (0, 15)
        self.min_comp.value = 0
        self.team_select.value = 'All'
        self.filtered_df = self.df.copy()
        self._display_results()
    
    def _display_results(self):
        """Display filtered results with pagination for performance."""
        with self.output:
            clear_output(wait=True)
            
            n_results = len(self.filtered_df)
            print(f"\n{'='*60}")
            print(f"RESULTS: {n_results} jobs found")
            if n_results > self.MAX_DISPLAY_ROWS:
                print(f"(Showing first {self.MAX_DISPLAY_ROWS} results)")
            print(f"{'='*60}\n")
            
            if n_results == 0:
                print("No jobs match your criteria. Try adjusting filters.")
                return
            
            # Limit display for performance
            display_df = self.filtered_df.head(self.MAX_DISPLAY_ROWS)
            
            # Build HTML table efficiently
            rows = []
            for _, row in display_df.iterrows():
                title = row['title'] or 'Unknown'
                url = row['url'] or '#'
                team = row['team'] or 'N/A'
                locations = ', '.join(row['location']) if row['location'] else 'N/A'
                comp = row['compensation'] or 'N/A'
                years = row['years_experience']
                years_str = f"{int(years)}+ yrs" if pd.notna(years) else 'N/A'
                skills = ', '.join(row['skills'][:4]) + ('...' if len(row['skills']) > 4 else '') if row['skills'] else 'N/A'
                
                rows.append(f"""<tr>
                    <td><a href="{url}" target="_blank">{title}</a></td>
                    <td>{team}</td>
                    <td>{locations}</td>
                    <td>{comp}</td>
                    <td>{years_str}</td>
                    <td>{skills}</td>
                </tr>""")
            
            html_table = f"""
            <style>
                .job-table {{ border-collapse: collapse; width: 100%; font-size: 12px; }}
                .job-table th, .job-table td {{ border: 1px solid #ddd; padding: 8px; text-align: left; }}
                .job-table th {{ background-color: #4CAF50; color: white; }}
                .job-table tr:nth-child(even) {{ background-color: #f2f2f2; }}
                .job-table tr:hover {{ background-color: #ddd; }}
                .job-table a {{ color: #0066cc; text-decoration: none; }}
                .job-table a:hover {{ text-decoration: underline; }}
            </style>
            <table class="job-table">
                <tr>
                    <th>Title</th>
                    <th>Team</th>
                    <th>Location</th>
                    <th>Compensation</th>
                    <th>Years Exp</th>
                    <th>Key Skills</th>
                </tr>
                {''.join(rows)}
            </table>
            """
            display(HTML(html_table))
    
    def display(self):
        """Display the complete filter UI."""
        # Header
        header = widgets.HTML("<h2>OpenAI Jobs Filter</h2>")
        
        # Keyword section
        keyword_label = widgets.HTML("<b>Keyword Filters</b> (comma-separated):")
        keyword_box = widgets.VBox([
            keyword_label,
            self.keyword_include,
            self.keyword_exclude,
            self.search_sections
        ])
        
        # Skills section
        skills_label = widgets.HTML("<b>Skills Filter</b> (Ctrl/Cmd+Click for multiple):")
        skills_box = widgets.VBox([
            skills_label,
            widgets.HBox([self.skills_select, self.skills_mode])
        ])
        
        # Location section (new)
        location_label = widgets.HTML("<b>Location Filter</b>:")
        location_box = widgets.VBox([
            location_label,
            self.location_select
        ])
        
        # Other filters
        other_label = widgets.HTML("<b>Other Filters</b>:")
        other_box = widgets.VBox([
            other_label,
            self.team_select,
            self.years_exp,
            self.min_comp,
        ])
        
        # Buttons
        buttons = widgets.HBox([self.apply_btn, self.reset_btn])
        
        # Combine all - reorganized layout
        top_row = widgets.HBox([keyword_box, skills_box, location_box], layout=widgets.Layout(gap='20px'))
        
        ui = widgets.VBox([
            header,
            top_row,
            other_box,
            buttons,
            self.output
        ], layout=widgets.Layout(padding='10px'))
        
        display(ui)
        
        # Show initial results
        self._display_results()

In [15]:
# Create and display the filter UI
job_filter = JobFilter(df, ALL_SKILLS, ALL_LOCATIONS)
job_filter.display()

VBox(children=(HTML(value='<h2>OpenAI Jobs Filter</h2>'), HBox(children=(VBox(children=(HTML(value='<b>Keyword…

## 4. Quick Search Functions

Use these helper functions for quick filtering without the UI.

In [None]:
def search_jobs(
    keywords: List[str] = None,
    exclude: List[str] = None,
    skills: List[str] = None,
    locations: List[str] = None,
    min_years: int = None,
    max_years: int = None,
    min_comp_k: int = None,
    team: str = None,
    skills_match_all: bool = False
) -> pd.DataFrame:
    """
    Search jobs with various filters.
    
    Args:
        keywords: List of keywords to match (any)
        exclude: List of keywords to exclude
        skills: List of skills to match
        locations: List of locations to filter (any match)
        min_years: Minimum years of experience
        max_years: Maximum years of experience
        min_comp_k: Minimum compensation in thousands
        team: Team name to filter
        skills_match_all: If True, match ALL skills; else ANY
    
    Returns:
        Filtered DataFrame
    """
    result = df.copy()
    
    # Use pre-computed search columns for performance
    if keywords:
        keywords_lower = [k.lower() for k in keywords]
        mask = pd.Series([False] * len(result), index=result.index)
        for kw in keywords_lower:
            mask = mask | result['_search_all'].str.contains(kw, na=False, regex=False)
        result = result[mask]
    
    if exclude:
        exclude_lower = [k.lower() for k in exclude]
        for kw in exclude_lower:
            result = result[~result['_search_all'].str.contains(kw, na=False, regex=False)]
    
    if skills:
        if skills_match_all:
            result = result[result['skills'].apply(lambda x: all(s in x for s in skills))]
        else:
            result = result[result['skills'].apply(lambda x: any(s in x for s in skills))]
    
    if locations:
        result = result[result['location'].apply(lambda x: any(loc in x for loc in locations))]
    
    if min_years is not None:
        result = result[(result['years_experience'].isna()) | (result['years_experience'] >= min_years)]
    
    if max_years is not None:
        result = result[(result['years_experience'].isna()) | (result['years_experience'] <= max_years)]
    
    if min_comp_k is not None:
        result = result[(result['compensation_min_k'].isna()) | (result['compensation_min_k'] >= min_comp_k)]
    
    if team:
        result = result[result['team'].str.contains(team, case=False, na=False)]
    
    return result[['title', 'team', 'location', 'compensation', 'years_experience', 'skills', 'url']]

In [None]:
# Example: Find Python backend jobs requiring 3-5 years experience
results = search_jobs(
    keywords=['backend'],
    skills=['python'],
    max_years=5
)
print(f"Found {len(results)} jobs:")
results[['title', 'team', 'compensation', 'years_experience']]

In [None]:
# Example: Find ML/AI jobs with compensation >= $300K, excluding manager roles
results = search_jobs(
    skills=['machine learning', 'deep learning'],
    exclude=['manager', 'lead'],
    min_comp_k=300
)
print(f"Found {len(results)} jobs:")
results[['title', 'team', 'compensation', 'skills']]

In [None]:
# Example: Find distributed systems jobs
results = search_jobs(
    skills=['distributed systems', 'kubernetes'],
    skills_match_all=False  # Match ANY of these skills
)
print(f"Found {len(results)} jobs:")
results[['title', 'team', 'compensation', 'skills']]

## 5. View Job Details

In [None]:
def show_job_details(title_contains: str):
    """
    Show full details for a job matching the title.
    """
    matches = df[df['title'].str.contains(title_contains, case=False)]
    
    if len(matches) == 0:
        print(f"No jobs found matching '{title_contains}'")
        return
    
    for _, job in matches.iterrows():
        print("=" * 60)
        print(f"TITLE: {job['title']}")
        print(f"TEAM: {job['team']}")
        print(f"COMPENSATION: {job['compensation']}")
        print(f"YEARS EXPERIENCE: {job['years_experience']}")
        print(f"SKILLS: {', '.join(job['skills'])}")
        print(f"URL: {job['url']}")
        print("\n--- ABOUT THE ROLE ---")
        print(job['about_the_role'][:1000] + "..." if len(job['about_the_role']) > 1000 else job['about_the_role'])
        print("\n--- YOU MIGHT THRIVE ---")
        print(job['you_might_thrive'][:1000] + "..." if len(job['you_might_thrive']) > 1000 else job['you_might_thrive'])
        print("=" * 60 + "\n")

In [None]:
# Example: Show details for a specific job
show_job_details("Research Engineer, Codex")

## 6. Statistics and Analysis

In [None]:
# Compensation statistics
print("=== COMPENSATION STATISTICS ===")
comp_df = df[df['compensation_min_k'].notna()]
print(f"Jobs with compensation data: {len(comp_df)}")
print(f"Min: ${comp_df['compensation_min_k'].min()}K")
print(f"Max: ${comp_df['compensation_min_k'].max()}K")
print(f"Median: ${comp_df['compensation_min_k'].median()}K")
print(f"Mean: ${comp_df['compensation_min_k'].mean():.0f}K")

In [None]:
# Jobs by team
print("=== JOBS BY TEAM ===")
team_counts = df['team'].value_counts()
for team, count in team_counts.head(15).items():
    print(f"{team}: {count}")

In [None]:
# Experience requirements distribution
print("=== YEARS EXPERIENCE DISTRIBUTION ===")
years_counts = df['years_experience'].value_counts().sort_index()
for years, count in years_counts.items():
    if pd.notna(years):
        print(f"{int(years)}+ years: {count} jobs")