In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = pd.read_csv("/kaggle/input/ai-impact-on-job-market-20242030/ai_job_trends_dataset.csv")
it_df = df[df['Industry'] == 'IT'].copy()

it_keywords = [
    'Engineer', 'Developer', 'Analyst', 'Data', 'Software', 'System',
    'Network', 'Security', 'Cloud', 'DevOps', 'Machine Learning', 'AI',
    'Architect', 'Administrator', 'Scientist'
]
keyword_pattern = '|'.join(it_keywords)

true_it_df = it_df[it_df['Job Title'].str.contains(keyword_pattern, case=False, na=False)]

true_it_df.dropna(subset=['Job Title', 'Automation Risk (%)'], inplace=True)

risk_by_true_it_job = true_it_df.groupby('Job Title')['Automation Risk (%)'].mean().sort_values(ascending=False).head(20)

base_color = "#e67cb9"
custom_palette = sns.light_palette(
    base_color,
    n_colors=len(risk_by_true_it_job),
    reverse=True
)

plt.style.use('seaborn-v0_8-whitegrid')
plt.figure(figsize=(12, 10))

ax = sns.barplot(
    x=risk_by_true_it_job.values,
    y=risk_by_true_it_job.index,
    palette=custom_palette,
    hue=risk_by_true_it_job.index,
    legend=False
)

ax.set_title('Top 20 Professioni IT con Maggior Rischio di Automazione', fontsize=18, pad=20)
ax.set_xlabel('Rischio Medio di Automazione (%)', fontsize=14)
ax.set_ylabel('Professione nel Settore IT', fontsize=14)

for i, v in enumerate(risk_by_true_it_job.values):
    ax.text(v + 0.5, i, f'{v:.1f}%', color='black', va='center')

plt.xlim(0, risk_by_true_it_job.values.max() * 1.15)
plt.tight_layout()
plt.savefig("Top_20_Professioni_IT_Maggior_Rischio_Automazione.png")
plt.show()