In [77]:
import pandas as pd
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from sqlalchemy import create_engine
import dash_bootstrap_components as dbc


In [78]:
# Load and clean data
engine = create_engine('sqlite:///Steam_Games_db.sqlite')
df = pd.read_sql("SELECT * FROM general_info", engine)


In [79]:
df.columns


Index(['Unnamed: 0', 'AppID', 'Name', 'Release date', 'Estimated owners',
       'Peak CCU', 'Required age', 'Price', 'Metacritic score', 'User score',
       'Average playtime forever', 'Average playtime two weeks',
       'Median playtime forever', 'Median playtime two weeks', 'Developers',
       'Publishers', 'Categories', 'Genres', 'Tags'],
      dtype='object')

In [80]:
df

Unnamed: 0.1,Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,Metacritic score,User score,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags
0,0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,0,0,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling"
1,1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,0,0,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc..."
2,2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,0,0,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",No Tag
3,3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,0,0,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz..."
4,4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.00,0,0,0,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97405,97405,3080940,Femdom Game World: Mom,"Aug 22, 2024",0 - 20000,0,0,4.19,0,0,0,0,0,0,Femdom Game World,Femdom Game World,"Single-player,Family Sharing",Casual,No Tag
97406,97406,2593970,Blocky Farm,"Aug 30, 2024",0 - 0,0,0,8.99,0,0,0,0,0,0,Forever Entertainment S. A.,Forever Entertainment S. A.,"Single-player,Steam Achievements,Partial Contr...","Casual,Simulation,Strategy",No Tag
97407,97407,3137150,Infiltrate & Extract,"Aug 30, 2024",0 - 0,0,0,0.00,0,0,0,0,0,0,Carlos Garrido,DigiPen Intsitute of Technology,Single-player,"Strategy,Free To Play",No Tag
97408,97408,3124670,Escape The Garage,"Aug 29, 2024",0 - 0,0,0,4.99,0,0,0,0,0,0,CryneX,"CryneX,CryTechGames","Single-player,Steam Achievements,Family Sharing","Adventure,Casual,Indie",No Tag


In [81]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97410 entries, 0 to 97409
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Unnamed: 0                  97410 non-null  int64  
 1   AppID                       97410 non-null  int64  
 2   Name                        97410 non-null  object 
 3   Release date                97410 non-null  object 
 4   Estimated owners            97410 non-null  object 
 5   Peak CCU                    97410 non-null  int64  
 6   Required age                97410 non-null  int64  
 7   Price                       97410 non-null  float64
 8   Metacritic score            97410 non-null  int64  
 9   User score                  97410 non-null  int64  
 10  Average playtime forever    97410 non-null  int64  
 11  Average playtime two weeks  97410 non-null  int64  
 12  Median playtime forever     97410 non-null  int64  
 13  Median playtime two weeks   974

In [82]:
# Drop redundant 'Unnamed: 0' column if it exists
if 'Unnamed: 0' in df.columns:
    df = df.drop(columns=['Unnamed: 0'])

# Convert 'Release date' to datetime
df['Release date'] = pd.to_datetime(df['Release date'], errors='coerce')

# Convert score and price columns to numeric
df['User score'] = pd.to_numeric(df['User score'], errors='coerce')
df['Metacritic score'] = pd.to_numeric(df['Metacritic score'], errors='coerce')
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

# Check conversion results
df[['Release date', 'User score', 'Metacritic score', 'Price']].head()


Unnamed: 0,Release date,User score,Metacritic score,Price
0,2008-10-21,0,0,19.99
1,2017-10-12,0,0,0.99
2,2021-11-17,0,0,4.99
3,2020-07-23,0,0,5.99
4,2020-02-03,0,0,0.0


In [83]:
playtime_cols = [
    'Average playtime forever',
    'Average playtime two weeks',
    'Median playtime forever',
    'Median playtime two weeks'
]
for col in playtime_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

In [84]:
import pandas as pd

# Convert 'Release date' to datetime, invalid parsing will be set as NaT
df['Release date'] = pd.to_datetime(df['Release date'], errors='coerce')

# Extract the year from the 'Release date'
df['Year'] = df['Release date'].dt.year

# Convert year to integer 
df['Year'] = df['Year'].astype('Int64')  

# View the result
print(df[['Release date', 'Year']].head())


  Release date  Year
0   2008-10-21  2008
1   2017-10-12  2017
2   2021-11-17  2021
3   2020-07-23  2020
4   2020-02-03  2020


In [85]:
df

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,Metacritic score,User score,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Year
0,20200,Galactic Bowling,2008-10-21,0 - 20000,0,0,19.99,0,0,0,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",2008
1,655370,Train Bandit,2017-10-12,0 - 20000,0,0,0.99,0,0,0,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",2017
2,1732930,Jolt Project,2021-11-17,0 - 20000,0,0,4.99,0,0,0,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",No Tag,2021
3,1355720,Henosis™,2020-07-23,0 - 20000,0,0,5.99,0,0,0,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",2020
4,1139950,Two Weeks in Painland,2020-02-03,0 - 20000,0,0,0.00,0,0,0,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97405,3080940,Femdom Game World: Mom,2024-08-22,0 - 20000,0,0,4.19,0,0,0,0,0,0,Femdom Game World,Femdom Game World,"Single-player,Family Sharing",Casual,No Tag,2024
97406,2593970,Blocky Farm,2024-08-30,0 - 0,0,0,8.99,0,0,0,0,0,0,Forever Entertainment S. A.,Forever Entertainment S. A.,"Single-player,Steam Achievements,Partial Contr...","Casual,Simulation,Strategy",No Tag,2024
97407,3137150,Infiltrate & Extract,2024-08-30,0 - 0,0,0,0.00,0,0,0,0,0,0,Carlos Garrido,DigiPen Intsitute of Technology,Single-player,"Strategy,Free To Play",No Tag,2024
97408,3124670,Escape The Garage,2024-08-29,0 - 0,0,0,4.99,0,0,0,0,0,0,CryneX,"CryneX,CryTechGames","Single-player,Steam Achievements,Family Sharing","Adventure,Casual,Indie",No Tag,2024


In [86]:

# Filter and sort playtime data
df_playtime = df[df['Average playtime forever'].notna()]
df_playtime = df_playtime[df_playtime['Average playtime forever'] > 0]
top10_played = df_playtime.sort_values('Average playtime forever', ascending=False).head(10)

# Plot
fig1 = px.bar(
    top10_played,
    x='Average playtime forever',
    y='Name',
    orientation='h',
    title='Top 10 Most-Played Games',
    labels={'Average playtime forever': 'Avg Playtime (minutes)', 'Name': 'Game'}
)
fig1.update_layout(yaxis={'categoryorder': 'total ascending'})
fig1.show()


In [87]:
# Count games per year
year_count = df['Year'].value_counts().reset_index()
year_count.columns = ['Year', 'Count']
year_count = year_count.sort_values('Year')

# Plot
fig2 = px.line(
    year_count,
    x='Year',
    y='Count',
    title=' Games Released by Year',
    labels={'Count': 'Number of Games', 'Year': 'Release Year'}
)
fig2.update_layout(xaxis_title="Year", yaxis_title="Games")
fig2.show()


In [88]:
# Group by publisher and calculate average user score
scores = df.groupby('Publishers')['User score'].mean().reset_index()

# Sort and take top 10
top_scores = scores.sort_values('User score', ascending=False).head(10)

# Plot
fig3 = px.bar(
    top_scores,
    x='Publishers',
    y='User score',
    title='Top Publishers by Avg User Score',
    labels={'Publishers': 'Publisher', 'User score': 'Avg User Score'}
)

fig3.update_layout(
    xaxis_title="Publisher",
    yaxis_title="Score",
    xaxis_tickangle=-45
)

fig3.show()


In [89]:
# Initialize Dash app with Bootstrap theme
# ----------------------------------------
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])
app.title = "Steam Game Visualizations"


# APP LAYOUT: Organize page elements

app.layout = dbc.Container([
    # Dashboard title
    html.H1("Steam Games Dashboard", className="text-center my-4"),

    # Row with genre dropdown on left and graphs on right
    dbc.Row([
        # Dropdown filter for selecting game genre
        dbc.Col([
            html.Label("Filter by Genre:"),
            dcc.Dropdown(
                id='genre-dropdown',
                options=[{'label': g, 'value': g}
                         for g in sorted({g.strip() for gs in df['Genres'] for g in gs.split(',')})],
                placeholder="Select genre"
            )
        ], width=3),  # Sidebar column width

        # Column with interactive graphs
        dbc.Col([
            dcc.Graph(id='playtime-bar', style={'height': '500px'}),  # Graph 1: Playtime
            dcc.Graph(id='year-line', style={'height': '400px'}),     # Graph 2: Release year trend
            dcc.Graph(figure=fig3, id='rating-bar', style={'height': '450px'})  # Graph 3: Static publisher chart
        ], width=9)
    ])
], fluid=True)  # Fluid=True allows responsive resizing


# CALLBACK: Update two graphs based on selected genre

@app.callback(
    [Output('playtime-bar', 'figure'),  # Graph 1
     Output('year-line', 'figure')],    # Graph 2
    [Input('genre-dropdown', 'value')]  # Triggered when genre is selected
)
def update_graphs(selected_genre):
    # Filter the dataset if a genre is selected
    filtered = df[df['Genres'].str.contains(selected_genre, na=False)] if selected_genre else df

# 1️ Top 10 Most-Played Games
    playtime = filtered[['Name', 'Average playtime forever']].dropna()
    playtime = playtime[playtime['Average playtime forever'] > 0]
    top10 = playtime.sort_values('Average playtime forever', ascending=False).head(10)

    fig1 = px.bar(
        top10,
        x='Average playtime forever',
        y='Name',
        orientation='h',
        title="Top 10 Most-Played Games"
    )
    fig1.update_layout(xaxis_title="Minutes", yaxis_title="Game",
                       yaxis={'categoryorder': 'total ascending'})

    # 2️ Games Released per Year
    year_count = filtered['Year'].value_counts().reset_index()
    year_count.columns = ['Year', 'Count']
    year_count = year_count.sort_values('Year')

    fig2 = px.line(
        year_count,
        x='Year',
        y='Count',
        title='Games Released by Year'
    )
    fig2.update_layout(xaxis_title="Year", yaxis_title="Games")

    return fig1, fig2

# Run the app in Jupyter Notebook (use "external" to see it in a  browser or "inline" to run it within the notbook)vbnbm./

app.run(jupyter_mode="inline")
