In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from dash import Dash, dcc, html, Input, Output
from statsmodels.tsa.arima.model import ARIMA

# Data loading
cybersec = pd.read_excel(r".\Global_Cybersecurity_Threats.xlsx")
missing = cybersec.isnull().sum()
print(missing)
cybersec_cleaned = cybersec.dropna()
cybersec_cleaned.isnull().sum()

# App setup
app = Dash(__name__)
app.title = "Cybersecurity Dashboard"

# Dropdown options
industry_options = [{'label': i, 'value': i} for i in sorted(cybersec['Target Industry'].dropna().unique())]
country_options = [{'label': c, 'value': c} for c in sorted(cybersec['Country'].dropna().unique())]
year_options = [{'label': y, 'value': y} for y in sorted(cybersec['Year'].dropna().unique())]

# ========= DASH LAYOUT =========
app.layout = html.Div([
    html.H1("🛡 Cybersecurity Threats Dashboard", style={'textAlign': 'center', 'color': 'blue'}),
    dcc.Tabs([
        # 1. Attack Types
        dcc.Tab(label='⚔️ Attack Types', children=[
            html.Br(),
            html.Label("Select Target Industry:", style={'color': 'blue', 'fontWeight': 'bold'}),
            dcc.Dropdown(id='industry-select', options=industry_options, value=industry_options[0]['value']),
            dcc.Graph(id='attack-types-by-industry')
        ]),

        # 2. Time Trends
        dcc.Tab(label='⏱ Time Trends', children=[
            html.Br(),
            dcc.Graph(id='incidents-over-time'),
            dcc.Graph(id='attack-type-trend')
        ]),

        # 3. By Industry
        dcc.Tab(label='🏭 By Industry', children=[
            html.Br(),
            dcc.Graph(id='industry-financial-loss'),
            dcc.Graph(id='industry-defense')
        ]),

        # 4. By Country
        dcc.Tab(label='🌍 By Country', children=[
            html.Br(),
            html.Label("Select Country:", style={'color': 'blue', 'fontWeight': 'bold'}),
            dcc.Dropdown(id='country-select', options=country_options, value=country_options[0]['value']),
            html.Br(),
            html.Label("Select Year:", style={'color': 'blue', 'fontWeight': 'bold'}),
            dcc.Dropdown(id='year-select', options=year_options, value=year_options[0]['value']),
            dcc.Graph(id='country-attack-types'),
            dcc.Graph(id='country-loss-by-industry'),
            dcc.Graph(id='country-affected-users')
        ]),

        # 5. Forecast (ARIMA)
        dcc.Tab(label='📈 Forecast', children=[
            html.Br(),
            dcc.Graph(id='forecast-financial-loss'),
            dcc.Graph(id='forecast-incidents'),
            dcc.Graph(id='forecast-top-industry')
        ])
    ])
])

# ========= CALLBACKS =========

# Attack Types tab
@app.callback(
    Output('attack-types-by-industry', 'figure'),
    Input('industry-select', 'value')
)
def update_attack_by_industry(industry):
    df = cybersec[cybersec['Target Industry'] == industry]
    fig = px.pie(df, names='Attack Type', title=f'Attack Types in {industry}', hole=0.4)
    return fig

# Time Trends tab
@app.callback(
    Output('incidents-over-time', 'figure'),
    Output('attack-type-trend', 'figure'),
    Input('industry-select', 'value')  # dummy
)
def update_time_trends(_):
    trend_df = cybersec.groupby('Year').size().reset_index(name='Incident Count')
    coef = np.polyfit(trend_df['Year'], trend_df['Incident Count'], 1)
    trend_df['Trend'] = np.poly1d(coef)(trend_df['Year'])

    fig1 = px.line(trend_df, x='Year', y='Incident Count', markers=True,
                   title='📈 Number of Incidents Over Time')
    fig1.add_trace(go.Scatter(x=trend_df['Year'], y=trend_df['Trend'],
                              mode='lines', name='Trend', line=dict(dash='dot')))

    group = cybersec.groupby(["Year", "Attack Type"]).size().reset_index(name="Count")
    fig2 = px.line(group, x='Year', y='Count', color='Attack Type',
                   title='📊 Attack Type Trends Over Time')
    return fig1, fig2

# By Industry tab
@app.callback(
    Output('industry-financial-loss', 'figure'),
    Output('industry-defense', 'figure'),
    Input('industry-select', 'value')  # dummy
)
def update_industry(_):
    group = cybersec.groupby(["Target Industry", "Defense Mechanism Used"]).size().reset_index(name="Count")

    fig1 = px.bar(
        cybersec.groupby('Target Industry')['Financial Loss (in Million $)'].sum().reset_index(),
        x='Financial Loss (in Million $)', y='Target Industry', orientation='h',
        title='🏭 Total Financial Loss by Industry',
        color='Financial Loss (in Million $)', color_continuous_scale='Blues'
    )

    fig2 = px.bar(group, x='Target Industry', y='Count', color='Defense Mechanism Used',
                  title='🛡 Defense Mechanisms Used by Industry')
    return fig1, fig2

# By Country tab
@app.callback(
    Output('country-attack-types', 'figure'),
    Output('country-loss-by-industry', 'figure'),
    Output('country-affected-users', 'figure'),
    Input('country-select', 'value'),
    Input('year-select', 'value')
)
def update_country_tab(country, year):
    df_filtered = cybersec[(cybersec['Country'] == country) & (cybersec['Year'] == year)]

    fig1 = px.pie(df_filtered, names='Attack Type', title=f'Attack Types in {country} ({year})', hole=0.4)

    loss_industry = df_filtered.groupby('Target Industry')['Financial Loss (in Million $)'].sum().reset_index()
    fig2 = px.bar(loss_industry, x='Target Industry', y='Financial Loss (in Million $)',
                  title=f'Financial Loss by Industry in {country} ({year})',
                  color='Financial Loss (in Million $)', color_continuous_scale='oranges')

    affected_users = df_filtered.groupby('Target Industry')['Number of Affected Users'].sum().reset_index()
    fig3 = px.bar(affected_users, x='Number of Affected Users', y='Target Industry', orientation='h',
                  title=f'Users Affected in {country} ({year})',
                  color='Number of Affected Users', color_continuous_scale='Blues')

    return fig1, fig2, fig3

# Forecast tab (ARIMA)
@app.callback(
    Output('forecast-financial-loss', 'figure'),
    Output('forecast-incidents', 'figure'),
    Output('forecast-top-industry', 'figure'),
    Input('industry-select', 'value')  # dummy input
)
def update_forecast(_):
    # 1. Total Financial Loss Forecast
    df_real = cybersec.groupby('Year')['Financial Loss (in Million $)'].sum().reset_index()
    df_real.columns = ['timestamp', 't']
    model1 = ARIMA(df_real['t'], order=(1, 1, 1)).fit()
    forecast1 = model1.forecast(steps=5)
    forecast_years1 = pd.Series(range(df_real['timestamp'].max() + 1, df_real['timestamp'].max() + 6))
    df_forecast1 = pd.DataFrame({'timestamp': forecast_years1, 't': forecast1.values})
    df_combined1 = pd.concat([df_real, df_forecast1])

    fig1 = px.line(df_combined1, x='timestamp', y='t', markers=True,
                   title='💸 ARIMA Forecast: Total Financial Loss')
    fig1.add_vline(x=df_real['timestamp'].max(), line_dash='dot', line_color='red')

    # 2. Incidents Forecast
    df_incidents = cybersec.groupby('Year').size().reset_index(name='Incident Count')
    model2 = ARIMA(df_incidents['Incident Count'], order=(1, 1, 1)).fit()
    forecast2 = model2.forecast(steps=5)
    forecast_years2 = pd.Series(range(df_incidents['Year'].max() + 1, df_incidents['Year'].max() + 6))
    df_forecast2 = pd.DataFrame({'Year': forecast_years2, 'Incident Count': forecast2.values})
    df_combined2 = pd.concat([df_incidents, df_forecast2])

    fig2 = px.line(df_combined2, x='Year', y='Incident Count', markers=True,
                   title='📊 ARIMA Forecast: Cybersecurity Incidents')
    fig2.add_vline(x=df_incidents['Year'].max(), line_dash='dot', line_color='red')

    # 3. Top Industry Loss Forecast
    industry_metrics = cybersec.groupby('Target Industry').agg({
        'Financial Loss (in Million $)': 'sum',
        'Number of Affected Users': 'sum',
        'Target Industry': 'count'
    }).rename(columns={'Target Industry': 'Number of Incidents'})
    top_industry = industry_metrics['Financial Loss (in Million $)'].idxmax()

    loss_series = cybersec[cybersec['Target Industry'] == top_industry].groupby('Year')['Financial Loss (in Million $)'].sum()
    model3 = ARIMA(loss_series, order=(1, 1, 1)).fit()
    forecast3 = model3.forecast(steps=5)
    forecast_years3 = pd.Series(range(loss_series.index.max() + 1, loss_series.index.max() + 6))
    df_forecast3 = pd.DataFrame({'Year': forecast_years3, 'Forecasted Loss': forecast3.values})
    df_combined3 = pd.concat([loss_series.reset_index(), df_forecast3], ignore_index=True)

    fig3 = go.Figure()
    fig3.add_trace(go.Scatter(x=df_combined3['Year'], y=df_combined3['Financial Loss (in Million $)'],
                              mode='lines+markers', name='Actual'))
    fig3.add_trace(go.Scatter(x=forecast_years3, y=forecast3.values,
                              mode='lines+markers', name='Forecast', line=dict(color='red')))
    fig3.update_layout(title=f"🔮 ARIMA Forecast: Financial Loss for {top_industry}",
                       xaxis_title='Year', yaxis_title='Loss (Million $)')

    return fig1, fig2, fig3

# ========= RUN APP =========
if __name__ == '__main__':
    app.run(debug=True, port=8080)

Country                                  0
Year                                     0
Attack Type                             98
Target Industry                          0
Financial Loss (in Million $)          129
Number of Affected Users               102
Attack Source                           79
Security Vulnerability Type             77
Defense Mechanism Used                   0
Incident Resolution Time (in Hours)     76
dtype: int64



Non-invertible starting MA parameters found. Using zeros as starting parameters.


An unsupported index was provided. As a result, forecasts cannot be generated. To use the model for forecasting, use one of the supported classes of index.


An unsupported index was provided. As a result, forecasts cannot be generated. To use the model for forecasting, use one of the supported classes of index.


An unsupported index was provided. As a result, forecasts cannot be generated. To use the model for forecasting, use one of the supported classes of index.


No supported index is available. Prediction results will be given with an integer index beginning at `start`.


No supported index is available. In the next version, calling this method in a model without a supported index will result in an exception.


Non-invertible starting MA parameters found. Using zeros as starting parameters.


An unsupported index was provided. As a result, forecasts cannot be generated. To use the model for forec