In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/kaggle/input/ai-job-dataset/ai_job_dataset.csv')
df.head()

In [None]:
top_jobs=df.groupby('job_title')['salary_usd'].median().sort_values(ascending=False).head(10)
print(top_jobs)

In [None]:
plt.figure(figsize=(12,8))
sns.barplot(x=top_jobs.values, y=top_jobs.index, palette = 'magma')
plt.title('Top 10 AI Job Titles by Median Salary')
plt.xlabel('Median Salary (USD)')
plt.ylabel('Job Title')

for i, value in enumerate(top_jobs.values):
    plt.text(value + 1000, i, f"${int(value):,}", va='center')
    plt.xlim(90000,105000)
plt.tight_layout()
plt.show()

In [None]:
# Only keep relevant columns
df_remote = df[['remote_ratio', 'salary_usd']].dropna()

# Check unique values in remote_ratio
print(df_remote['remote_ratio'].value_counts())


In [None]:
df_remote['remote_status'] = df_remote['remote_ratio'].map({
    0: 'No Remote',
    50: 'Hybrid',
    100: 'Fully Remote'
})


In [None]:
plt.figure(figsize=(10, 6))
sns.boxplot(x='remote_status', y='salary_usd', data=df_remote, palette='viridis')

plt.title('Salary Distribution by Remote Work Type', fontsize=14)
plt.xlabel('Remote Work Status')
plt.ylabel('Salary (USD)')
plt.tight_layout()
plt.yscale('log')
plt.show()



In [None]:
# Operation for calculating the median
country_salary = df.groupby('company_location')['salary_usd'].median().sort_values(ascending=False)
top_countries = country_salary.head(10)

#Plot
plt.figure(figsize=(12, 6))
sns.barplot(x=top_countries.values, y=top_countries.index, palette='viridis')
plt.title('Top 10 Countries by Median AI Salary (USD)')
plt.xlabel('Median Salary (USD)')
plt.ylabel('Country')
for i, value in enumerate(top_countries.values):
    plt.text(value + 1000, i, f"${int(value):,}", va='center')
    plt.xlim(35000,160000)
plt.tight_layout()
plt.show()


In [None]:
# Operation for calculating the median
edu_salary = df.groupby('education_required')['salary_usd'].median().sort_values(ascending=False)

# Plot
plt.style.use('default')  
sns.set_context('notebook', font_scale=1.2)
sns.set_style("whitegrid") 
plt.figure(figsize=(8, 6))
sns.barplot(x=edu_salary.values, y=edu_salary.index, palette='viridis')
plt.title('Median Salary by Education Level')
plt.xlabel('Median Salary (USD)')
plt.ylabel('Education Level')
for i, value in enumerate(edu_salary.values):
    plt.text(value + 1000, i, f"${value:,.0f}", va='center')
    plt.xlim(70000,105000)
plt.show()