<a href="https://colab.research.google.com/github/crystalclcm/JobPostings/blob/main/Untitled17.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [60]:

import pandas as pd

# Load all three CSVs
old = pd.read_csv('/content/IE_DS_2015_to_2019.csv')
new = pd.read_csv('/content/DS_2021_2023.csv')
eu_old = pd.read_csv('/content/EU_DS_2015_to_2019.csv')  # Upload this file first

# Normalize column names
old.columns = [c.strip() for c in old.columns]
new.columns = [c.strip() for c in new.columns]
eu_old.columns = [c.strip() for c in eu_old.columns]

# Map Ireland name to IE
old['geo'] = old['geo'].replace({'Ireland': 'IE'})
eu_old['geo'] = eu_old['geo'].replace({'European Union': 'EU27_2020', 'EU': 'EU27_2020'})

# Keep only needed columns
old = old[['TIME_PERIOD', 'geo', 'OBS_VALUE']].dropna()
new = new[['TIME_PERIOD', 'geo', 'OBS_VALUE']].dropna()
eu_old = eu_old[['TIME_PERIOD', 'geo', 'OBS_VALUE']].dropna()

# Convert year to int
old['TIME_PERIOD'] = old['TIME_PERIOD'].astype(int)
new['TIME_PERIOD'] = new['TIME_PERIOD'].astype(int)
eu_old['TIME_PERIOD'] = eu_old['TIME_PERIOD'].astype(int)

# Combine all datasets
combined = pd.concat([old, new, eu_old])

# Filter for IE and EU aggregate
combined = combined[combined['geo'].isin(['IE', 'EU27_2020'])]

# Pivot for visualization
pivot = combined.pivot_table(index='TIME_PERIOD', columns='geo', values='OBS_VALUE')
pivot.sort_index(inplace=True)



In [61]:

# --- Visualization: Eurostat Trend (Ireland vs EU) ---
import plotly.express as px

fig = px.line(
    pivot.reset_index(),
    x='TIME_PERIOD',
    y=pivot.columns,
    markers=True,
    title='Digital Skills Trend: Ireland vs EU (2015–2023)',
    labels={'value':'% Digital Skills','TIME_PERIOD':'Year'},
    color_discrete_map={'IE':'blue','EU27_2020':'green'}
)
fig.show()





In [62]:

fig_area = px.area(
    pivot.reset_index(),
    x='TIME_PERIOD',
    y=pivot.columns,
    title='Growth in Digital Skills: Ireland vs EU (2015–2023)',
    labels={'value':'% Digital Skills','TIME_PERIOD':'Year'},
    color_discrete_map={'IE':'blue','EU27_2020':'green'}
)


In [63]:

from plotly.subplots import make_subplots
import plotly.graph_objects as go

# --- Eurostat pivot table (already created in your previous code) ---
# pivot contains TIME_PERIOD as index and columns ['IE','EU27_2020']

# --- Kaggle job posting snapshot ---
skill_df = pd.DataFrame({
    'Skill': ['Cloud', 'SQL', 'Python', 'Statistics', 'AI', 'Java', 'Machine Learning', 'Data Science', 'Big Data'],
    'Percentage': [7.4, 4.2, 3.6, 3.1, 2.9, 2.2, 1.8, 1.3, 1.2]
})

# Sort skills for better visualization
sorted_skills = skill_df.sort_values('Percentage', ascending=True)

# Create dashboard
fig_dashboard = make_subplots(
    rows=1, cols=2,
    subplot_titles=('Eurostat Digital Skills Trend (IE vs EU)', 'Job Posting Skill Demand (2023-2024)'),
    column_widths=[0.55, 0.45]
)

# Left: Eurostat trend
fig_dashboard.add_trace(go.Scatter(x=pivot.index, y=pivot['IE'], mode='lines+markers', name='Ireland'), row=1, col=1)
if 'EU27_2020' in pivot.columns:
    fig_dashboard.add_trace(go.Scatter(x=pivot.index, y=pivot['EU27_2020'], mode='lines+markers', name='EU'), row=1, col=1)
fig_dashboard.update_xaxes(title_text='Year', row=1, col=1)
fig_dashboard.update_yaxes(title_text='% Digital Skills', row=1, col=1)

# Right: Kaggle snapshot
fig_dashboard.add_trace(go.Bar(x=sorted_skills['Percentage'], y=sorted_skills['Skill'], orientation='h', name='Skills'), row=1, col=2)
fig_dashboard.update_xaxes(title_text='Share of postings (%)', row=1, col=2)

fig_dashboard.update_layout(
    title_text='Digital Skills vs Industry Demand',
    height=600,
    annotations=[dict(text='Note: Kaggle dataset represents 2023-2024 snapshot', x=0.75, y=-0.15, xref='paper', yref='paper', showarrow=False)]
)

fig_dashboard.show()


In [64]:

sorted_skills = skill_df.sort_values('Percentage', ascending=True)

fig_skills = px.bar(sorted_skills, x='Percentage', y='Skill', orientation='h',
                    title='Job Posting Skill Demand (Normalized %)',
                    labels={'Percentage':'Share of postings (%)'})
fig_skills.show()
