In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Set visualization styles

In [3]:
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_palette("viridis")
plt.rcParams['figure.figsize'] = (12, 7)

# Load the dataset

In [4]:
df = pd.read_csv("ai_job_dataset.csv")

# 1. Job Role Distribution - Horizontal Bar Chart


In [5]:
plt.figure(figsize=(12, 8))
role_counts = df['Job Role'].value_counts()
colors = sns.color_palette("viridis", len(role_counts))
ax = sns.barplot(x=role_counts.values, y=role_counts.index, palette=colors)
plt.title('Distribution of AI Job Roles', fontsize=16)
plt.xlabel('Number of Positions', fontsize=12)
for i, v in enumerate(role_counts.values):
    ax.text(v + 0.5, i, str(v), va='center')
plt.tight_layout()
plt.savefig('job_roles_distribution.png', dpi=300, bbox_inches='tight')
plt.close()

KeyError: 'Job Role'

<Figure size 1200x800 with 0 Axes>

# 2. Salary Distribution - Histogram with KDE


In [6]:
plt.figure(figsize=(12, 7))
sns.histplot(df['Salary (USD)'], kde=True, bins=30, color='purple', alpha=0.7)
plt.axvline(df['Salary (USD)'].mean(), color='red', linestyle='--', 
            label=f'Mean: ${df["Salary (USD)"].mean():,.0f}')
plt.axvline(df['Salary (USD)'].median(), color='green', linestyle='--', 
            label=f'Median: ${df["Salary (USD)"].median():,.0f}')
plt.title('Salary Distribution in AI Jobs', fontsize=16)
plt.xlabel('Annual Salary (USD)', fontsize=12)
plt.ylabel('Frequency', fontsize=12)
plt.legend()
plt.tight_layout()
plt.savefig('salary_distribution.png', dpi=300, bbox_inches='tight')
plt.close()

KeyError: 'Salary (USD)'

<Figure size 1200x700 with 0 Axes>

# 3. Top Locations - Colorful Bar Chart


In [7]:
plt.figure(figsize=(14, 8))
location_counts = df['Location'].value_counts().head(15)
colors = sns.color_palette("plasma", len(location_counts))
ax = sns.barplot(x=location_counts.index, y=location_counts.values, palette=colors)
plt.title('Top 15 Locations for AI Jobs', fontsize=16)
plt.xlabel('Location', fontsize=12)
plt.ylabel('Number of Jobs', fontsize=12)
plt.xticks(rotation=45, ha='right')
for i, v in enumerate(location_counts.values):
    ax.text(i, v + 1, str(v), ha='center')
plt.tight_layout()
plt.savefig('top_locations.png', dpi=300, bbox_inches='tight')
plt.close()

KeyError: 'Location'

<Figure size 1400x800 with 0 Axes>

# 4. Salary by Job Role - Box Plot


In [8]:
plt.figure(figsize=(14, 8))
order = df.groupby('Job Role')['Salary (USD)'].median().sort_values(ascending=False).index
sns.boxplot(x='Job Role', y='Salary (USD)', data=df, palette='Set3', order=order)
plt.title('Salary Distribution by Job Role', fontsize=16)
plt.xlabel('Job Role', fontsize=12)
plt.ylabel('Salary (USD)', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('salary_by_role.png', dpi=300, bbox_inches='tight')
plt.close()

KeyError: 'Job Role'

<Figure size 1400x800 with 0 Axes>

# 5. Experience vs Salary - Scatter Plot with Trend Line


In [11]:
plt.figure(figsize=(12, 8))
sns.scatterplot(x='Years of Experience', y='Salary (USD)', data=df, 
                hue='Job Role', size='Years of Experience', sizes=(50, 200), alpha=0.7)
sns.regplot(x='Years of Experience', y='Salary (USD)', data=df, 
            scatter=False, line_kws={"color": "red", "alpha": 0.7, "lw": 2})
plt.title('Experience vs Salary Correlation', fontsize=16)
plt.xlabel('Years of Experience', fontsize=12)
plt.ylabel('Salary (USD)', fontsize=12)
plt.legend(bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.savefig('experience_vs_salary.png', dpi=300, bbox_inches='tight')
plt.close()

ValueError: Could not interpret value `Years of Experience` for `x`. An entry with this name does not appear in `data`.

<Figure size 1200x800 with 0 Axes>

# 6. Company Size and Salary - Violin Plot


In [12]:
plt.figure(figsize=(14, 8))
sns.violinplot(x='Company Size', y='Salary (USD)', data=df, palette='Blues')
plt.title('Salary Distribution by Company Size', fontsize=16)
plt.xlabel('Company Size', fontsize=12)
plt.ylabel('Salary (USD)', fontsize=12)
plt.tight_layout()
plt.savefig('company_size_salary.png', dpi=300, bbox_inches='tight')
plt.close()

ValueError: Could not interpret value `Company Size` for `x`. An entry with this name does not appear in `data`.

<Figure size 1400x800 with 0 Axes>

# 7. Job Roles by Company Size - Stacked Bar Chart


In [13]:
plt.figure(figsize=(14, 8))
role_size = pd.crosstab(df['Company Size'], df['Job Role'])
role_size_percent = role_size.div(role_size.sum(axis=1), axis=0) * 100
role_size_percent.plot(kind='bar', stacked=True, colormap='viridis')
plt.title('Job Roles Distribution by Company Size', fontsize=16)
plt.xlabel('Company Size', fontsize=12)
plt.ylabel('Percentage', fontsize=12)
plt.legend(title='Job Role', bbox_to_anchor=(1.02, 1), loc='upper left')
plt.tight_layout()
plt.savefig('roles_by_company_size.png', dpi=300, bbox_inches='tight')
plt.close()

KeyError: 'Company Size'

<Figure size 1400x800 with 0 Axes>

# 8. Top Skills (if available) - Horizontal Bar Chart

In [14]:
if 'Required Skills' in df.columns:
    all_skills = []
    for skills in df['Required Skills'].dropna():
        all_skills.extend([skill.strip() for skill in skills.split(',')])
    
    skills_df = pd.DataFrame({'Skill': all_skills})
    skill_counts = skills_df['Skill'].value_counts().head(15)
    
    plt.figure(figsize=(12, 10))
    colors = sns.color_palette("magma", len(skill_counts))
    ax = sns.barplot(x=skill_counts.values, y=skill_counts.index, palette=colors)
    plt.title('Top 15 Required Skills in AI Jobs', fontsize=16)
    plt.xlabel('Count', fontsize=12)
    for i, v in enumerate(skill_counts.values):
        ax.text(v + 0.5, i, str(v), va='center')
    plt.tight_layout()
    plt.savefig('top_skills.png', dpi=300, bbox_inches='tight')
    plt.close()

# 9. Salary by Location - Beautiful Swarm Plot


In [15]:
plt.figure(figsize=(14, 10))
top_locations = df['Location'].value_counts().head(10).index
location_df = df[df['Location'].isin(top_locations)]
sns.swarmplot(x='Salary (USD)', y='Location', data=location_df, palette='plasma', size=8)
plt.title('Salary Distribution in Top 10 Locations', fontsize=16)
plt.xlabel('Salary (USD)', fontsize=12)
plt.ylabel('Location', fontsize=12)
plt.tight_layout()
plt.savefig('salary_by_location_swarm.png', dpi=300, bbox_inches='tight')
plt.close()

KeyError: 'Location'

<Figure size 1400x1000 with 0 Axes>

# 10. Experience by Job Role - Grouped Box Plot


In [16]:
plt.figure(figsize=(14, 8))
sns.boxplot(x='Job Role', y='Years of Experience', data=df, palette='Set2')
plt.title('Experience Distribution by Job Role', fontsize=16)
plt.xlabel('Job Role', fontsize=12)
plt.ylabel('Years of Experience', fontsize=12)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.savefig('experience_by_role.png', dpi=300, bbox_inches='tight')
plt.close()

ValueError: Could not interpret value `Job Role` for `x`. An entry with this name does not appear in `data`.

<Figure size 1400x800 with 0 Axes>

In [17]:
print("Visualizations completed and saved successfully!")

Visualizations completed and saved successfully!
