In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math

## Cargando dataset

In [37]:
# Lista de los valores únicos

df=pd.read_csv("../data/interim/modelado_FINAL.csv")

df.columns

Index(['id', 'title', 'location_area', 'company_display_name', 'contract_time',
       'salary_min', 'salary_max', 'salary_is_predicted', 'created',
       'latitude', 'longitude', 'redirect_url', 'description_full', 'remote',
       'permanent_contract', 'freelance', 'bonuses', 'career_development',
       'immediate_start', 'startup', 'multinational',
       'consulting_or_outsourcing', 'experience_gt_5', 'intern', 'junior',
       'senior', 'lead', 'principal_or_manager', 'higher_education',
       'certifications_required', 'software_development', 'data_science_ml',
       'cybersecurity', 'qa_testing', 'it_support_infrastructure',
       'project_product_management', 'ux_ui_design'],
      dtype='object')

### Eliminar información muy irrelevante

En este caso borramos la columna de si el salario es una predicción de la plataforma de la que obtenemos los salarios

In [38]:
columns_to_drop = ['salary_is_predicted']
df = df.drop(columns=columns_to_drop)

Borramos ofertas cuyo título desvirtúa el dataset

In [4]:
# Lista de patrones que indican que NO es una oferta real de empleo
exclusion_keywords = [
    "paid market research",
    "research study",
    "study for",
    "60min",
    "participate in",
    "compensated interview",
    "survey study",
    "market research session"
]

# Convertir el título a minúsculas para asegurar coincidencias
df["title_clean"] = df["title"].fillna("").str.lower()

# Crear una máscara para detectar si alguna de esas frases aparece en el título
pattern = '|'.join(exclusion_keywords)
mask_fake_offers = df["title_clean"].str.contains(pattern, regex=True)

# Eliminar esas filas
df = df[~mask_fake_offers].copy()

# Limpiar la columna auxiliar si no la necesitas
df.drop(columns=["title_clean"], inplace=True)


In [5]:
df.shape

(120109, 36)

## Obtención de seniorities del título del trabajo
Para complementar el modelo zero-shot aplicado y conseguir mejores resultados

In [6]:
# Paso 1: Definir patrones regex por nivel de seniority (con prioridad)
seniority_regex = [
    ("cto", r"\b(?:cto|chief technology officer)\b"),
    ("principal_or_manager", r"\b(?:principal|manager)\b"),
    ("lead", r"\blead\b"),
    ("senior", r"\b(?:senior|sr)\b"),
    ("junior", r"\b(?:junior|jr)\b"),
    ("intern", r"\b(?:intern|trainee|internship|apprentice)\b")
]

# Paso 2: Asegurar que 'title' esté en minúsculas y sin NaNs
df["title"] = df["title"].fillna("").str.lower()

# Paso 3: Crear columnas auxiliares *_title para detección con regex
for level, pattern in seniority_regex:
    df[level + "_title"] = df["title"].str.contains(pattern, regex=True, na=False).astype(int)

# Paso 4: Asegurar que las columnas reales existen (crear 'cto' si hace falta)
for level, _ in seniority_regex:
    if level not in df.columns:
        df[level] = 0

# Paso 5: Si *_title es 1, sustituir la columna real por 1
for level, _ in seniority_regex:
    df.loc[df[level + "_title"] == 1, level] = 1

# Paso 6: Eliminar las columnas *_title
df.drop(columns=[level + "_title" for level, _ in seniority_regex], inplace=True)

# Paso 7: Crear la columna 'seniority' según prioridad
df['seniority'] = 'unknown'
for level, _ in seniority_regex:
    df.loc[(df[level] == 1) & (df['seniority'] == 'unknown'), 'seniority'] = level



In [7]:
columns_to_drop = ["cto"]
df.drop(columns=columns_to_drop)

Unnamed: 0,id,title,location_area,company_display_name,contract_time,salary_min,salary_max,created,latitude,longitude,...,higher_education,certifications_required,software_development,data_science_ml,cybersecurity,qa_testing,it_support_infrastructure,project_product_management,ux_ui_design,seniority
0,5172667476,it infrastructure administrator,"US, Ohio, Franklin County, Grandview Heights",Experis,,92562.85,92562.85,2025-05-01T06:17:44Z,39.991073,-83.000202,...,0.032945,0.353574,0.513744,0.176900,0.197202,0.227965,0.310890,0.820510,0.404728,unknown
1,5043698107,it program manager,"US, Massachusetts, Middlesex County, Marlborough",BJ's Wholesale Club,,106284.47,106284.47,2025-02-09T06:17:30Z,42.346740,-71.550240,...,0.312562,0.504661,0.276124,0.329195,0.045312,0.022307,0.027231,0.378242,0.081963,principal_or_manager
2,5172667443,it operations window engineer,"US, New York, Orange County, Chester",Experis,full_time,107743.86,107743.86,2025-05-01T06:17:44Z,41.353013,-74.263700,...,0.059313,0.796602,0.633942,0.427687,0.490027,0.857570,0.436988,0.556164,0.473490,unknown
3,5172673565,senior it business systems analyst,"US, Wisconsin, Kenosha County, Trevor","Uline, Inc.",full_time,118673.77,118673.77,2025-05-01T06:19:53Z,42.516405,-88.132486,...,0.714006,0.659511,0.807941,0.362978,0.142299,0.338205,0.247681,0.854075,0.280981,senior
4,5172673330,senior it business systems analyst,"US, Wisconsin, Racine County, Burlington","Uline, Inc.",full_time,112896.08,112896.08,2025-05-01T06:19:49Z,42.662802,-88.276015,...,0.601669,0.767332,0.839628,0.171539,0.031274,0.057170,0.125325,0.546452,0.247142,senior
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120779,5150951611,associate software engineer,"US, Minnesota, Dakota County, Lilydale",About Healthcare Inc,full_time,88226.10,88226.10,2025-04-17T22:55:46Z,44.933100,-93.121400,...,0.127674,0.519539,0.859994,0.096875,0.094639,0.277828,0.214207,0.672515,0.088821,unknown
120780,5160964574,spotsylvania county treasurer’s office is hiri...,"US, Minnesota, Saint Louis County, Virginia",Spotsylvania County Treasurer’s Office,,73698.23,73698.23,2025-04-24T15:41:18Z,47.523260,-92.536571,...,0.163843,0.621199,0.351872,0.061071,0.314628,0.073963,0.500907,0.607822,0.263788,unknown
120781,5156358662,development strength & conditioning specialist,"US, Minnesota, Anoka County, Blaine",Minnesota United FC,full_time,81683.86,81683.86,2025-04-22T12:44:38Z,45.168807,-93.252464,...,0.966665,0.993632,0.975856,0.961056,0.987338,0.984671,0.987267,0.990510,0.989325,unknown
120782,5161049702,database administrator (27890),"US, Minnesota, Stearns County, Saint Cloud",Dahl Consulting,,130580.62,130580.62,2025-04-24T16:47:29Z,45.493912,-94.242586,...,0.446955,0.774430,0.030155,0.020205,0.479370,0.020484,0.327343,0.464971,0.002404,unknown


## Creando la columna "salary_avg"

Se eliminan los salarios con datos muy bajos que pueden distorsionar la media

In [8]:
df = df.drop(df[(df["salary_min"] <= 10000) | (df["salary_max"] <= 10000)].index)

In [9]:
# Crear la columna salary_avg como media de salary_min y salary_max
df["salary_avg"] = df[["salary_min", "salary_max"]].mean(axis=1)


## Limpiando seniorities y reduciendo columnas

### Obtención de media salarial de cada seniority

In [10]:
salary_by_seniority = (
    df[df['seniority'] != 'unknown']
    .groupby('seniority')['salary_avg']
    .median()
    .to_dict()
)
salary_by_seniority

{'cto': 121482.24,
 'intern': 55129.14,
 'junior': 73084.2,
 'lead': 117646.245,
 'principal_or_manager': 130000.0,
 'senior': 121123.735}

In [11]:
df

Unnamed: 0,id,title,location_area,company_display_name,contract_time,salary_min,salary_max,created,latitude,longitude,...,software_development,data_science_ml,cybersecurity,qa_testing,it_support_infrastructure,project_product_management,ux_ui_design,cto,seniority,salary_avg
0,5172667476,it infrastructure administrator,"US, Ohio, Franklin County, Grandview Heights",Experis,,92562.85,92562.85,2025-05-01T06:17:44Z,39.991073,-83.000202,...,0.513744,0.176900,0.197202,0.227965,0.310890,0.820510,0.404728,0,unknown,92562.85
1,5043698107,it program manager,"US, Massachusetts, Middlesex County, Marlborough",BJ's Wholesale Club,,106284.47,106284.47,2025-02-09T06:17:30Z,42.346740,-71.550240,...,0.276124,0.329195,0.045312,0.022307,0.027231,0.378242,0.081963,0,principal_or_manager,106284.47
2,5172667443,it operations window engineer,"US, New York, Orange County, Chester",Experis,full_time,107743.86,107743.86,2025-05-01T06:17:44Z,41.353013,-74.263700,...,0.633942,0.427687,0.490027,0.857570,0.436988,0.556164,0.473490,0,unknown,107743.86
3,5172673565,senior it business systems analyst,"US, Wisconsin, Kenosha County, Trevor","Uline, Inc.",full_time,118673.77,118673.77,2025-05-01T06:19:53Z,42.516405,-88.132486,...,0.807941,0.362978,0.142299,0.338205,0.247681,0.854075,0.280981,0,senior,118673.77
4,5172673330,senior it business systems analyst,"US, Wisconsin, Racine County, Burlington","Uline, Inc.",full_time,112896.08,112896.08,2025-05-01T06:19:49Z,42.662802,-88.276015,...,0.839628,0.171539,0.031274,0.057170,0.125325,0.546452,0.247142,0,senior,112896.08
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120779,5150951611,associate software engineer,"US, Minnesota, Dakota County, Lilydale",About Healthcare Inc,full_time,88226.10,88226.10,2025-04-17T22:55:46Z,44.933100,-93.121400,...,0.859994,0.096875,0.094639,0.277828,0.214207,0.672515,0.088821,0,unknown,88226.10
120780,5160964574,spotsylvania county treasurer’s office is hiri...,"US, Minnesota, Saint Louis County, Virginia",Spotsylvania County Treasurer’s Office,,73698.23,73698.23,2025-04-24T15:41:18Z,47.523260,-92.536571,...,0.351872,0.061071,0.314628,0.073963,0.500907,0.607822,0.263788,0,unknown,73698.23
120781,5156358662,development strength & conditioning specialist,"US, Minnesota, Anoka County, Blaine",Minnesota United FC,full_time,81683.86,81683.86,2025-04-22T12:44:38Z,45.168807,-93.252464,...,0.975856,0.961056,0.987338,0.984671,0.987267,0.990510,0.989325,0,unknown,81683.86
120782,5161049702,database administrator (27890),"US, Minnesota, Stearns County, Saint Cloud",Dahl Consulting,,130580.62,130580.62,2025-04-24T16:47:29Z,45.493912,-94.242586,...,0.030155,0.020205,0.479370,0.020484,0.327343,0.464971,0.002404,0,unknown,130580.62


In [12]:
seniority_cols = ['intern', 'junior','senior', 'lead', 'principal_or_manager']

# Filtrar filas con 'unknown' en 'seniority'
mask = df['seniority'] == 'unknown'
subset = df.loc[mask, seniority_cols].copy()

# Penalizar 'principal_or_manager' si no cumple la condición
subset['principal_or_manager'] = subset['principal_or_manager'].where(
    (subset['principal_or_manager'] >= 0.98),
    -np.inf
)

# Penalizar 'lead' si no cumple la condición
subset['lead'] = subset['lead'].where(
    (subset['lead'] >= 0.98),
    -np.inf
)

# Asignar el seniority con mayor score tras ajustes
df.loc[mask, 'seniority'] = subset.idxmax(axis=1)

In [13]:
salary_by_seniority = (
    df[df['seniority'] != 'unknown']
    .groupby('seniority')['salary_avg']
    .median()
    .to_dict()
)
salary_by_seniority

{'cto': 121482.24,
 'intern': 74784.39,
 'junior': 80768.875,
 'lead': 115004.185,
 'principal_or_manager': 129023.735,
 'senior': 104182.08499999999}

In [14]:
df.drop(columns=['intern', 'junior', 'lead', 'principal_or_manager', 'senior', 'cto'], inplace=True)

In [15]:
df.columns

Index(['id', 'title', 'location_area', 'company_display_name', 'contract_time',
       'salary_min', 'salary_max', 'created', 'latitude', 'longitude',
       'redirect_url', 'description_full', 'remote', 'permanent_contract',
       'freelance', 'bonuses', 'career_development', 'immediate_start',
       'startup', 'multinational', 'consulting_or_outsourcing',
       'experience_gt_5', 'higher_education', 'certifications_required',
       'software_development', 'data_science_ml', 'cybersecurity',
       'qa_testing', 'it_support_infrastructure', 'project_product_management',
       'ux_ui_design', 'seniority', 'salary_avg'],
      dtype='object')

## Dividiendo location_area

Dividimos el location area en `state`, `county` y `city`

In [16]:
df["location_area"]

0             US, Ohio, Franklin County, Grandview Heights
1         US, Massachusetts, Middlesex County, Marlborough
2                     US, New York, Orange County, Chester
3                    US, Wisconsin, Kenosha County, Trevor
4                 US, Wisconsin, Racine County, Burlington
                                ...                       
120779              US, Minnesota, Dakota County, Lilydale
120780         US, Minnesota, Saint Louis County, Virginia
120781                 US, Minnesota, Anoka County, Blaine
120782          US, Minnesota, Stearns County, Saint Cloud
120783         US, Minnesota, Saint Louis County, Virginia
Name: location_area, Length: 119750, dtype: object

In [17]:
# Eliminar 'US, ' de la columna location_area
df['location_area'] = df['location_area'].str.replace('US, ', '', regex=False)

# Dividir la columna location_area en partes, separadas por comas
split_location = df['location_area'].str.split(',', expand=True)

# Asegurarse de que las filas con menos de tres partes tengan valores NaN o None
split_location = split_location.rename(columns={0: 'state', 1: 'county', 2: 'city'})

# Limpiar los espacios en blanco en cada nueva columna
split_location['state'] = split_location['state'].str.strip()
split_location['county'] = split_location['county'].str.strip()
split_location['city'] = split_location['city'].str.strip()

# Reemplazar valores vacíos con None
split_location['state'] = split_location['state'].replace('', None)
split_location['county'] = split_location['county'].replace('', None)
split_location['city'] = split_location['city'].replace('', None)

# Unir las columnas resultantes de nuevo al dataframe original
df = df.join(split_location)



In [18]:
df = df.drop('location_area', axis=1)

In [19]:
df["state"] = df["state"].replace("US", "unknown")

In [20]:
df["state"].describe()

count         119750
unique            50
top       California
freq            8373
Name: state, dtype: object

## Refinamiento de scores
Buscamos palabras determinadas en las columnas `title`  y `description_full` para colocar un score de 1 en las columnas correspondientes

#### Palabras clave en las descripciones

In [21]:
keywords_desc = {
    'remote': ['remote', 'work from home', 'working remotely'],
    'permanent_contract': ['permanent', 'full-time', 'full time', 'ongoing contract'],
    'freelance': ['freelance', 'self-employed', 'independent consultant'],
    'bonuses': ['bonus', 'bonuses', 'performance bonus', 'benefits', 'incentives'],
    'career_development': ['career development', 'career growth', 'training program', 'development opportunities', 'learning path'],
    'immediate_start': ['immediate start', 'start immediately'],
    'startup': ['startup', 'start-up', 'early-stage company'],
    'multinational': ['multinational', 'global company', 'international company'],
    'consulting_or_outsourcing': ['consulting', 'consultancy', 'outsourcing', 'external projects'],
    'experience_gt_5': ['5+ years', '5 years of experience', 'more than 5 years', 'extensive experience'],
    'higher_education': ['bachelor', 'university degree', 'academic degree'],
    'certifications_required': ['certification required', 'certifications required'],
    'software_development': ['development role'],
}


#### Palabras clave en los títulos

In [22]:
keywords_title= {
    'software_development': ['software developer', 'developer', 'software engineer'],
    'data_science_ml': ['data science', 'data scientist', 'machine learning', 'ml engineer', 'artificial intelligence'],
    'cybersecurity': ['cybersecurity', 'security analyst', 'information security', 'network security', 'security'],
    'qa_testing': ['quality assurance', 'test engineer', "qa", "test", "testing"],
    'it_support_infrastructure': ['it support', 'technical support', 'helpdesk', 'infrastructure engineer', 'system admin', 'system administrator', 'infrastructure administrator'],
    'project_product_management': ['project manager', 'product manager', 'product owner', 'scrum master', 'product designer'],
    'ux_ui_design': ['ux designer', 'user experience designer', 'ui designer', 'user interface designer', 'ux/ui designer'],
}

In [23]:
# Asegurar texto limpio en minúsculas y sin NaNs
df["title"] = df["title"].fillna("").str.lower()
df["description_full"] = df["description_full"].fillna("").str.lower()

# Buscar en title
for col, patterns in keywords_title.items():
    pattern_regex = '|'.join([rf'\b{p}\b' for p in patterns])
    df.loc[df["title"].str.contains(pattern_regex, regex=True), col] = 1

# Buscar en description_full
for col, patterns in keywords_desc.items():
    pattern_regex = '|'.join([rf'\b{p}\b' for p in patterns])
    df.loc[df["description_full"].str.contains(pattern_regex, regex=True), col] = 1


In [24]:
exclusive_group = [
    'ux_ui_design', 'software_development', 'data_science_ml',
    'cybersecurity', 'qa_testing', 'it_support_infrastructure', 'project_product_management'
]

# Filas donde al menos una columna tiene un 1 exacto
mask = (df[exclusive_group] == 1).any(axis=1)

# Para esas filas, forzar a 0 las columnas que no son 1
for col in exclusive_group:
    df.loc[mask & (df[col] != 1), col] = 0


## Cogiendo el score máximo
Con el fin de mejorar después las predicciones, cogemos un solo score por grupo de columnas

### `Sector`
sector_cols = [
    'software_development',
    'data_science_ml',
    'cybersecurity',
    'qa_testing',
    'it_support_infrastructure',
    'project_product_management',
    'ux_ui_design'
]

In [25]:
sector_cols = [
    'software_development',
    'data_science_ml',
    'cybersecurity',
    'qa_testing',
    'it_support_infrastructure',
    'project_product_management',
    'ux_ui_design'
]


In [26]:
# Crear una copia temporal del DataFrame con solo las columnas de sector
sector_scores = df[sector_cols].copy()

# Penalizar 'project_product_management' si su valor es menor a 0.98
if 'project_product_management' in sector_scores.columns:
    sector_scores['project_product_management'] = sector_scores['project_product_management'].where(
        sector_scores['project_product_management'] >= 0.97,
        -np.inf
    )

# Penalizar 'it_support_infrastructure' si su valor es menor a 0.98
if 'it_support_infrastructure' in sector_scores.columns:
    sector_scores['it_support_infrastructure'] = sector_scores['it_support_infrastructure'].where(
        sector_scores['it_support_infrastructure'] >= 0.97,
        -np.inf
    )

# Asignar la columna con el valor máximo como el sector
df['sector'] = sector_scores.idxmax(axis=1)

# Si el máximo valor es -inf (o NaN), asignar 'unknown'
df['sector'] = df.apply(
    lambda row: row['sector'] if sector_scores.loc[row.name].max() > 0 else 'unknown',
    axis=1
)



In [27]:
df.drop(columns=sector_cols, inplace=True)

### `type_of_company`
Si una empresa es startup, multinacional o nacional

In [28]:
# Asegurarte de que las columnas existen
if 'startup' in df.columns and 'multinational' in df.columns:
    
    # Inicializar con 'unknown'
    df['type_of_company'] = 'unknown'

    # Asignar 'startup' si la columna startup vale 1
    df.loc[df['startup'] == 1, 'type_of_company'] = 'startup'

    # Asignar 'multinational' si la columna multinational vale 1
    # (y no es startup, si las dos son 1, se queda como startup)
    df.loc[(df['multinational'] == 1) & (df['startup'] != 1), 'type_of_company'] = 'multinational'


## Aplicando thresholds

In [29]:
thresholds = {
    'remote': 0.8,
    'permanent_contract': 0.72,
    'freelance': 0.9,
    'bonuses': 0.77,
    'career_development': 0.91,
    'immediate_start': 0.85,
    'startup': 0.85,
    'multinational': 0.8,
    'consulting_or_outsourcing': 0.58,
    'experience_gt_5': 0.8,
    'higher_education': 0.61,
    'certifications_required': 0.76,
}

In [30]:
for col, threshold in thresholds.items():
    if col in df.columns:
        df[col] = (df[col] >= threshold).astype(int)

In [31]:
# Asegurarte de que las columnas existen
if 'startup' in df.columns and 'multinational' in df.columns:
    
    # Asignar 'startup' si la columna startup vale 1
    df.loc[df['startup'] == 1, 'type_of_company'] = 'startup'

    # Asignar 'multinational' si la columna multinational vale 1
    # (y no es startup, si las dos son 1, se queda como startup)
    df.loc[(df['multinational'] == 1) & (df['startup'] != 1), 'type_of_company'] = 'multinational'

In [32]:
company_types = df[df['type_of_company'].isin(['startup', 'multinational'])] \
    .dropna(subset=['company_display_name']) \
    .groupby('company_display_name')['type_of_company'] \
    .first().to_dict()

# Asignar ese tipo a todas las filas que coincidan por 'company_display_name'
df['type_of_company'] = df['company_display_name'].map(company_types).fillna(df['type_of_company'])


## Conversión de la columna `created` 
Esta columna se refiere solo a la fecha de creación, la dejamos solo en el año de creación

In [33]:
# Convertir la columna 'created' a tipo datetime
df["created"] = pd.to_datetime(df["created"])

# Extraer solo el año
df["created"] = df["created"].dt.year


In [34]:
df.to_csv('../data/interim/dataset_binario_junto.csv', index=False)

In [35]:
df

Unnamed: 0,id,title,company_display_name,contract_time,salary_min,salary_max,created,latitude,longitude,redirect_url,...,higher_education,certifications_required,seniority,salary_avg,state,county,city,3,sector,type_of_company
0,5172667476,it infrastructure administrator,Experis,,92562.85,92562.85,2025,39.991073,-83.000202,https://www.adzuna.com/land/ad/5172667476?se=A...,...,0,0,senior,92562.85,Ohio,Franklin County,Grandview Heights,,it_support_infrastructure,startup
1,5043698107,it program manager,BJ's Wholesale Club,,106284.47,106284.47,2025,42.346740,-71.550240,https://www.adzuna.com/land/ad/5043698107?se=A...,...,0,0,principal_or_manager,106284.47,Massachusetts,Middlesex County,Marlborough,,data_science_ml,unknown
2,5172667443,it operations window engineer,Experis,full_time,107743.86,107743.86,2025,41.353013,-74.263700,https://www.adzuna.com/land/ad/5172667443?se=A...,...,0,1,senior,107743.86,New York,Orange County,Chester,,qa_testing,startup
3,5172673565,senior it business systems analyst,"Uline, Inc.",full_time,118673.77,118673.77,2025,42.516405,-88.132486,https://www.adzuna.com/land/ad/5172673565?se=A...,...,1,0,senior,118673.77,Wisconsin,Kenosha County,Trevor,,software_development,startup
4,5172673330,senior it business systems analyst,"Uline, Inc.",full_time,112896.08,112896.08,2025,42.662802,-88.276015,https://www.adzuna.com/land/ad/5172673330?se=A...,...,0,1,senior,112896.08,Wisconsin,Racine County,Burlington,,software_development,startup
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
120779,5150951611,associate software engineer,About Healthcare Inc,full_time,88226.10,88226.10,2025,44.933100,-93.121400,https://www.adzuna.com/details/5150951611?utm_...,...,0,0,senior,88226.10,Minnesota,Dakota County,Lilydale,,software_development,unknown
120780,5160964574,spotsylvania county treasurer’s office is hiri...,Spotsylvania County Treasurer’s Office,,73698.23,73698.23,2025,47.523260,-92.536571,https://www.adzuna.com/details/5160964574?utm_...,...,0,0,senior,73698.23,Minnesota,Saint Louis County,Virginia,,software_development,unknown
120781,5156358662,development strength & conditioning specialist,Minnesota United FC,full_time,81683.86,81683.86,2025,45.168807,-93.252464,https://www.adzuna.com/details/5156358662?utm_...,...,1,1,senior,81683.86,Minnesota,Anoka County,Blaine,,project_product_management,startup
120782,5161049702,database administrator (27890),Dahl Consulting,,130580.62,130580.62,2025,45.493912,-94.242586,https://www.adzuna.com/details/5161049702?utm_...,...,0,1,senior,130580.62,Minnesota,Stearns County,Saint Cloud,,cybersecurity,unknown


In [36]:
# df["created"].unique