Exploratory Data Analysis (EDA) of global data-related roles and salaries using Python.

In [None]:
# Import libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_palette('viridis')

In [None]:
# Load dataset
df = pd.read_csv('data_jobs.csv')
print('Shape:', df.shape)
df.head()

In [None]:
# Inspect data
df.info()
df.describe()
df.isnull().sum()

In [None]:
# Basic cleaning
df.columns = [col.replace(' ', '_').lower() for col in df.columns]
print(df['company_location'].unique()[:10])

In [None]:
# EDA
plt.figure(figsize=(10,5))
df['job_title'].value_counts().head(10).plot(kind='barh')
plt.title('Top 10 job titles in data roles')
plt.xlabel('Number of roles')
plt.show()

In [None]:
# Salary analysis
plt.figure(figsize=(10,5))
df.groupby('job_title')['salary_in_usd'].mean().sort_values(ascending=False).head(10).plot(kind='barh')
plt.title('Average salary (USD) by role')
plt.xlabel('USD')
plt.show()

In [None]:
# Remote work distribution
plt.figure(figsize=(6,4))
df['remote_ratio'].replace({0:'On-site',50:'Hybrid',100:'Remote'}).value_counts().plot(kind='bar')
plt.title('Work mode distribution')
plt.ylabel('Count')
plt.show()

In [None]:
# Salary vs experience level
plt.figure(figsize=(8,5))
sns.boxplot(data=df, x='experience_level', y='salary_in_usd', order=['EN','MI','SE','EX'])
plt.title('Salary distribution by experience level')
plt.show()

In [None]:
# Country salary comparison
plt.figure(figsize=(10,6))
top_countries = df['company_location'].value_counts().head(10).index
sns.barplot(data=df[df['company_location'].isin(top_countries)], x='company_location', y='salary_in_usd')
plt.title('Average salary by top company locations')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Conclusion
print('''
📈 Conclusión:
Este análisis exploratorio de datos resalta las tendencias globales en los roles relacionados con datos, mostrando una correlación entre la experiencia, la modalidad de trabajo y los salarios.
Los resultados evidencian la relevancia de las habilidades técnicas y analíticas en un mercado impulsado por los datos.
Es vital resaltar que el trabajo remoto predomina en las ofertas y que los trabajos relacionado a Data Engineer y Data Scientist son los que presentan mejores ingresos.''')
