In [1]:
import dash
from dash import dcc, html
from dash.dependencies import Input, Output, State
import dash_bootstrap_components as dbc
import pandas as pd
import plotly.express as px

# Load the cleaned dataset
df = pd.read_csv('cleaned_file_Dashboard.csv')

# Fix the Year column
df['Year'] = pd.to_numeric(df['Year'], errors='coerce').fillna(0).astype(int)

# Get the valid year range
valid_years = df[df['Year'] > 0]['Year']  # Exclude invalid years
min_year = valid_years.min()
max_year = valid_years.max()

# Utility function to filter dataset
def filter_data(data, year_range, selected_genre, selected_region):
    filtered = data[(data['Year'] >= year_range[0]) & (data['Year'] <= year_range[1])]
    if selected_genre:
        filtered = filtered[filtered['Genre'] == selected_genre]
    return filtered

# Initialize Dash app with Bootstrap styling
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Layout for the dashboard
app.layout = dbc.Container([
    # Title and Dataset Information Button
    dbc.Row([
        dbc.Col(html.H1("Video Game Sales Dashboard", className="text-center text-primary mb-4"), width=10),
        dbc.Col(dbc.Button("About Dataset", id="open-modal", color="info", className="mb-3"), width=2),
    ]),

    # Dataset Information Modal
    dbc.Modal(
        [
            dbc.ModalHeader("Dataset Information"),
            dbc.ModalBody([
                html.P("This dataset contains a list of video games with sales greater than 100,000 copies."),
                html.P("It was scraped from vgchartz.com using Python's BeautifulSoup."),
                html.P("Key fields include:"),
                html.Ul([
                    html.Li("Rank: Ranking of overall sales"),
                    html.Li("Name: The game's name"),
                    html.Li("Platform: Platform of release (e.g., PC, PS4)"),
                    html.Li("Year: Year of release"),
                    html.Li("Genre: Genre of the game"),
                    html.Li("Publisher: Publisher of the game"),
                    html.Li("Regional Sales: Sales in North America, Europe, Japan, and other regions (in millions)"),
                    html.Li("Global Sales: Total worldwide sales (in millions)")
                ]),
                html.P("Note: The dataset includes 16,598 records, but two records were dropped due to incomplete information.")
            ]),
            dbc.ModalFooter(dbc.Button("Close", id="close-modal", color="secondary")),
        ],
        id="dataset-modal",
        size="lg",
    ),

    # KPIs Section
    dbc.Row([
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Total Sales", className="card-title"),
                html.H2(id="kpi-total-sales", className="card-text text-primary"),
                html.P("Filtered global sales (in millions)", className="text-muted"),
            ])
        ], color="light", outline=True), width=4),
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Top Genre", className="card-title"),
                html.H2(id="kpi-top-genre", className="card-text text-success"),
                html.P("Genre contributing the most sales", className="text-muted"),
            ])
        ], color="light", outline=True), width=4),
        dbc.Col(dbc.Card([
            dbc.CardBody([
                html.H5("Top Region", className="card-title"),
                html.H2(id="kpi-top-region", className="card-text text-danger"),
                html.P("Region with highest sales", className="text-muted"),
            ])
        ], color="light", outline=True), width=4),
    ], className="mb-4"),

    # Filters Section
    dbc.Row([
        dbc.Col([
            html.Label("Select Genre:", className="text-primary"),
            dcc.Dropdown(
                id='genre-filter',
                options=[{'label': g, 'value': g} for g in df['Genre'].unique()],
                value=None,  # Default: no selection
                placeholder="Select a genre"
            ),
        ], width=4),
        dbc.Col([
            html.Label("Select Region:", className="text-primary"),
            dcc.Dropdown(
                id='region-filter',
                options=[
                    {'label': 'North America', 'value': 'NA_Sales'},
                    {'label': 'Europe', 'value': 'EU_Sales'},
                    {'label': 'Japan', 'value': 'JP_Sales'},
                    {'label': 'Other', 'value': 'Other_Sales'},
                    {'label': 'Global', 'value': 'Global_Sales'}
                ],
                value='Global_Sales',  # Default: global sales
                placeholder="Select a region"
            ),
        ], width=4),
        dbc.Col([
            html.Label("Year Range:", className="text-primary"),
            dcc.RangeSlider(
                id='year-slider',
                min=min_year,
                max=max_year,
                value=[min_year, max_year],  # Default: full range
                marks={str(year): str(year) for year in range(min_year, max_year + 1, 5)},
                tooltip={"placement": "bottom", "always_visible": True},  # Dynamic tooltip
            ),
            html.Div(id='year-range-output', className="text-center mt-2"),
        ], width=4),
    ], className="mb-4"),

    # Charts Section
    dbc.Row([
        dbc.Col(dcc.Graph(id='sales-trend'), width=6),
        dbc.Col(dcc.Graph(id='market-share-genre'), width=6)
    ], className="mb-4"),
    dbc.Row([
        dbc.Col(dcc.Graph(id='regional-heatmap'), width=6),
        dbc.Col(dcc.Graph(id='top-games'), width=6)
    ]),
], fluid=True)

# Callback: Toggle About Dataset Modal
@app.callback(
    Output("dataset-modal", "is_open"),
    [Input("open-modal", "n_clicks"), Input("close-modal", "n_clicks")],
    [State("dataset-modal", "is_open")]
)
def toggle_about_modal(open_clicks, close_clicks, is_open):
    if open_clicks or close_clicks:
        return not is_open
    return is_open

@app.callback(
    [Output('kpi-total-sales', 'children'),
     Output('kpi-top-genre', 'children'),
     Output('kpi-top-region', 'children')],
    [Input('genre-filter', 'value'),
     Input('region-filter', 'value'),
     Input('year-slider', 'value')]
)
def update_kpis(selected_genre, selected_region, year_range):
    # Filter dataset
    filtered_df = filter_data(df, year_range, selected_genre, selected_region)
    
    # Handle empty filtered data
    if filtered_df.empty:
        return "0.00M", "N/A", "N/A"
    
    # Determine region column
    region_column = selected_region if selected_region != 'Global_Sales' else 'Global_Sales'
    
    # Calculate total sales
    total_sales = filtered_df[region_column].sum()
    
    # Determine top genre
    genre_sales = filtered_df.groupby('Genre')[region_column].sum()
    top_genre = genre_sales.idxmax() if not genre_sales.empty else "N/A"
    top_genre_sales = genre_sales.max() if not genre_sales.empty else 0
    
    # Determine top region
    regional_sales = filtered_df[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']].sum()
    top_region = regional_sales.idxmax() if not regional_sales.empty else "N/A"
    top_region_sales = regional_sales.max() if not regional_sales.empty else 0

    # Return formatted KPI values
    return (f"{total_sales:.2f}M",
            f"{top_genre} ({top_genre_sales:.2f}M)",
            f"{top_region} ({top_region_sales:.2f}M)")

    # Get sales for top genre and region
    top_genre_sales = filtered_df.groupby('Genre')[region_column].sum().max() if top_genre != "N/A" else 0
    regional_sales = filtered_df[['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']].sum()
    top_region = regional_sales.idxmax() if not regional_sales.empty else "N/A"
    top_region_sales = regional_sales.max() if top_region != "N/A" else 0

    return (f"{total_sales:.2f}M",
            f"{top_genre} ({top_genre_sales:.2f}M)",
            f"{top_region} ({top_region_sales:.2f}M)")

# Sales Trend Callback
@app.callback(
    Output('sales-trend', 'figure'),
    [Input('year-slider', 'value'),
     Input('region-filter', 'value'),
     Input('genre-filter', 'value')]
)
def update_sales_trend(year_range, selected_region, selected_genre):
    filtered_df = filter_data(df, year_range, selected_genre, selected_region)
    if filtered_df.empty:
        return px.line(title="No Data Available")
    
    region_column = selected_region if selected_region != 'Global_Sales' else 'Global_Sales'
    sales_by_year = filtered_df.groupby('Year')[region_column].sum().reset_index()
    
    fig = px.line(
        sales_by_year,
        x='Year',
        y=region_column,
        title=f"Sales Trend Over Time ({year_range[0]} - {year_range[1]})",
        labels={"Year": "Year", region_column: "Sales (Millions)"},
        markers=True
    )
    return fig

# Market Share by Genre Callback
@app.callback(
    Output('market-share-genre', 'figure'),
    [Input('year-slider', 'value'),
     Input('region-filter', 'value')]
)
def update_genre_market_share(year_range, selected_region):
    filtered_df = filter_data(df, year_range, None, selected_region)
    if filtered_df.empty:
        return px.pie(title="No Data Available")
    
    region_column = selected_region if selected_region != 'Global_Sales' else 'Global_Sales'
    genre_sales = filtered_df.groupby('Genre')[region_column].sum().reset_index()
    
    fig = px.pie(
        genre_sales,
        names='Genre',
        values=region_column,
        title=f"Genre Market Share ({year_range[0]} - {year_range[1]})"
    )
    fig.update_traces(textinfo='label+percent', textposition='inside')
    return fig

# Regional Heatmap Callback
@app.callback(
    Output('regional-heatmap', 'figure'),
    [Input('year-slider', 'value'),
     Input('genre-filter', 'value')]
)
def update_regional_heatmap(year_range, selected_genre):
    filtered_df = filter_data(df, year_range, selected_genre, None)
    if filtered_df.empty:
        return px.imshow(title="No Data Available")
    
    heatmap_data = filtered_df.pivot_table(
        index='Genre',
        values=['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales'],
        aggfunc='sum'
    )
    
    fig = px.imshow(
        heatmap_data,
        title=f"Regional Popularity by Genre ({year_range[0]} - {year_range[1]})",
        labels={'color': 'Sales (Millions)'},
        color_continuous_scale='Blues'
    )
    return fig

@app.callback(
    Output('top-games', 'figure'),
    [Input('year-slider', 'value'),
     Input('genre-filter', 'value')]
)
def update_top_games(year_range, selected_genre):
    filtered_df = filter_data(df, year_range, selected_genre, None)
    if filtered_df.empty:
        return px.bar(title="No Data Available")
    
    # Clean game names to avoid visual splitting
    filtered_df['Name'] = filtered_df['Name'].str.replace(r'[:/]', '-', regex=True)
    
    top_games = filtered_df.nlargest(10, 'Global_Sales')
    fig = px.bar(
        top_games,
        x='Name',
        y='Global_Sales',
        title=f"Top 10 Best-Selling Games ({year_range[0]} - {year_range[1]})",
        labels={"Name": "Game Name", "Global_Sales": "Sales (Millions)"},
        text='Global_Sales'
    )
    fig.update_traces(texttemplate='%{text:.2f}', textposition='outside')
    fig.update_layout(xaxis_tickangle=45)  # Rotate x-axis labels for better readability
    return fig

# Run the app
if __name__ == '__main__':
    app.run_server(debug=True, port=112)