In [55]:
import pandas as pd
from dash import Dash, dcc, html, Input, Output
import plotly.express as px
from sqlalchemy import create_engine
import dash_bootstrap_components as dbc


In [56]:
# Load and clean data
engine = create_engine('sqlite:///Steam_Games_db.sqlite')
df = pd.read_sql("SELECT * FROM general_info", engine)


In [57]:
df.columns


Index(['Unnamed: 0', 'AppID', 'Name', 'Release date', 'Estimated owners',
       'Peak CCU', 'Required age', 'Price', 'Metacritic score', 'User score',
       'Average playtime forever', 'Average playtime two weeks',
       'Median playtime forever', 'Median playtime two weeks', 'Developers',
       'Publishers', 'Categories', 'Genres', 'Tags'],
      dtype='object')

In [58]:
df

Unnamed: 0.1,Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,Metacritic score,User score,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags
0,0,20200,Galactic Bowling,"Oct 21, 2008",0 - 20000,0,0,19.99,0,0,0,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling"
1,1,655370,Train Bandit,"Oct 12, 2017",0 - 20000,0,0,0.99,0,0,0,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc..."
2,2,1732930,Jolt Project,"Nov 17, 2021",0 - 20000,0,0,4.99,0,0,0,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",No Tag
3,3,1355720,Henosis™,"Jul 23, 2020",0 - 20000,0,0,5.99,0,0,0,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz..."
4,4,1139950,Two Weeks in Painland,"Feb 3, 2020",0 - 20000,0,0,0.00,0,0,0,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97405,97405,3080940,Femdom Game World: Mom,"Aug 22, 2024",0 - 20000,0,0,4.19,0,0,0,0,0,0,Femdom Game World,Femdom Game World,"Single-player,Family Sharing",Casual,No Tag
97406,97406,2593970,Blocky Farm,"Aug 30, 2024",0 - 0,0,0,8.99,0,0,0,0,0,0,Forever Entertainment S. A.,Forever Entertainment S. A.,"Single-player,Steam Achievements,Partial Contr...","Casual,Simulation,Strategy",No Tag
97407,97407,3137150,Infiltrate & Extract,"Aug 30, 2024",0 - 0,0,0,0.00,0,0,0,0,0,0,Carlos Garrido,DigiPen Intsitute of Technology,Single-player,"Strategy,Free To Play",No Tag
97408,97408,3124670,Escape The Garage,"Aug 29, 2024",0 - 0,0,0,4.99,0,0,0,0,0,0,CryneX,"CryneX,CryTechGames","Single-player,Steam Achievements,Family Sharing","Adventure,Casual,Indie",No Tag


In [59]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 97410 entries, 0 to 97409
Data columns (total 19 columns):
 #   Column                      Non-Null Count  Dtype  
---  ------                      --------------  -----  
 0   Unnamed: 0                  97410 non-null  int64  
 1   AppID                       97410 non-null  int64  
 2   Name                        97410 non-null  object 
 3   Release date                97410 non-null  object 
 4   Estimated owners            97410 non-null  object 
 5   Peak CCU                    97410 non-null  int64  
 6   Required age                97410 non-null  int64  
 7   Price                       97410 non-null  float64
 8   Metacritic score            97410 non-null  int64  
 9   User score                  97410 non-null  int64  
 10  Average playtime forever    97410 non-null  int64  
 11  Average playtime two weeks  97410 non-null  int64  
 12  Median playtime forever     97410 non-null  int64  
 13  Median playtime two weeks   974

In [60]:
# Drop redundant 'Unnamed: 0' column if it exists
if 'Unnamed: 0' in df.columns:
    df = df.drop(columns=['Unnamed: 0'])

# Convert 'Release date' to datetime
df['Release date'] = pd.to_datetime(df['Release date'], errors='coerce')

# Convert score and price columns to numeric
df['User score'] = pd.to_numeric(df['User score'], errors='coerce')
df['Metacritic score'] = pd.to_numeric(df['Metacritic score'], errors='coerce')
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')

# Check conversion results
df[['Release date', 'User score', 'Metacritic score', 'Price']].head()


Unnamed: 0,Release date,User score,Metacritic score,Price
0,2008-10-21,0,0,19.99
1,2017-10-12,0,0,0.99
2,2021-11-17,0,0,4.99
3,2020-07-23,0,0,5.99
4,2020-02-03,0,0,0.0


In [61]:

# Convert 'Release date' to datetime, invalid parsing will be set as NaT
df['Release date'] = pd.to_datetime(df['Release date'], errors='coerce')

# Extract the year from the 'Release date'
df['Year'] = df['Release date'].dt.year

# Convert year to integer 
df['Year'] = df['Year'].astype('Int64')  

# View the result
print(df[['Release date', 'Year']].head())


  Release date  Year
0   2008-10-21  2008
1   2017-10-12  2017
2   2021-11-17  2021
3   2020-07-23  2020
4   2020-02-03  2020


In [62]:
df

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,Metacritic score,User score,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Year
0,20200,Galactic Bowling,2008-10-21,0 - 20000,0,0,19.99,0,0,0,0,0,0,Perpetual FX Creative,Perpetual FX Creative,"Single-player,Multi-player,Steam Achievements,...","Casual,Indie,Sports","Indie,Casual,Sports,Bowling",2008
1,655370,Train Bandit,2017-10-12,0 - 20000,0,0,0.99,0,0,0,0,0,0,Rusty Moyher,Wild Rooster,"Single-player,Steam Achievements,Full controll...","Action,Indie","Indie,Action,Pixel Graphics,2D,Retro,Arcade,Sc...",2017
2,1732930,Jolt Project,2021-11-17,0 - 20000,0,0,4.99,0,0,0,0,0,0,Campião Games,Campião Games,Single-player,"Action,Adventure,Indie,Strategy",No Tag,2021
3,1355720,Henosis™,2020-07-23,0 - 20000,0,0,5.99,0,0,0,0,0,0,Odd Critter Games,Odd Critter Games,"Single-player,Full controller support","Adventure,Casual,Indie","2D Platformer,Atmospheric,Surreal,Mystery,Puzz...",2020
4,1139950,Two Weeks in Painland,2020-02-03,0 - 20000,0,0,0.00,0,0,0,0,0,0,Unusual Games,Unusual Games,"Single-player,Steam Achievements","Adventure,Indie","Indie,Adventure,Nudity,Violent,Sexual Content,...",2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
97405,3080940,Femdom Game World: Mom,2024-08-22,0 - 20000,0,0,4.19,0,0,0,0,0,0,Femdom Game World,Femdom Game World,"Single-player,Family Sharing",Casual,No Tag,2024
97406,2593970,Blocky Farm,2024-08-30,0 - 0,0,0,8.99,0,0,0,0,0,0,Forever Entertainment S. A.,Forever Entertainment S. A.,"Single-player,Steam Achievements,Partial Contr...","Casual,Simulation,Strategy",No Tag,2024
97407,3137150,Infiltrate & Extract,2024-08-30,0 - 0,0,0,0.00,0,0,0,0,0,0,Carlos Garrido,DigiPen Intsitute of Technology,Single-player,"Strategy,Free To Play",No Tag,2024
97408,3124670,Escape The Garage,2024-08-29,0 - 0,0,0,4.99,0,0,0,0,0,0,CryneX,"CryneX,CryTechGames","Single-player,Steam Achievements,Family Sharing","Adventure,Casual,Indie",No Tag,2024


In [63]:
# Filter and sort playtime data
df_playtime = df[df['Average playtime forever'].notna()]
df_playtime = df_playtime[df_playtime['Average playtime forever'] > 0]
top10_played = df_playtime.sort_values('Average playtime forever', ascending=False).head(10)
top10_played

Unnamed: 0,AppID,Name,Release date,Estimated owners,Peak CCU,Required age,Price,Metacritic score,User score,Average playtime forever,Average playtime two weeks,Median playtime forever,Median playtime two weeks,Developers,Publishers,Categories,Genres,Tags,Year
10228,1071100,Boom 3D,2019-06-19,20000 - 50000,199,0,16.99,0,0,145727,21,145727,21,Global Delight Technologies Pvt. Ltd.,Global Delight Technologies Pvt. Ltd.,No Categories,"Audio Production,Utilities","Utilities,Audio Production,VR,3D",2019
19303,1055010,Energy Engine PC Live Wallpaper,2019-05-10,50000 - 100000,67,0,3.99,0,0,104238,1430,208473,1430,3dm_live_wallpapers,3dm_live_wallpapers,No Categories,"Design & Illustration,Utilities","Design & Illustration,Utilities,Software,Simul...",2019
36483,1195830,副作用之瞳-Tlicolity Eyes-,2021-06-29,50000 - 100000,13,0,0.0,0,0,90351,0,90351,0,"IDEA FACTORY,Frontier Works,JoyMoe Interactive...",JoyMoe Interactive Entertainment Limited,Single-player,"Adventure,Casual,Free to Play,RPG","Interactive Fiction,Visual Novel,Word Game,Dat...",2021
23309,707590,Defense Clicker,2017-10-31,20000 - 50000,3,0,0.0,0,0,76068,0,76068,0,Albatros Softworks,Albatros Softworks,"Single-player,Steam Achievements,Steam Cloud,S...","Casual,Strategy,Early Access","Casual,Strategy,Early Access,Tower Defense,Cli...",2017
39591,404580,Relive,2015-10-15,50000 - 100000,0,0,0.0,0,0,68357,0,136629,0,Studio Evil,Studio Evil,"Single-player,Steam Achievements","Adventure,Free to Play,Indie","Free to Play,Indie,Adventure",2015
57947,517910,Sisyphus Reborn,2016-09-05,50000 - 100000,0,0,0.0,0,0,68159,0,136291,0,"Edwin Montgomery,Myshkin Entertainment",Myshkin Entertainment,"Single-player,Steam Achievements",Adventure,"Adventure,Free to Play,Atmospheric,Philosophic...",2016
46203,1283970,YoloMouse,2020-05-01,200000 - 500000,4420,0,3.99,0,0,64973,0,114016,0,Dragonrise Games,Dragonrise Games,No Categories,"Animation & Modeling,Design & Illustration,Edu...","Utilities,Design & Illustration,Animation & Mo...",2020
27911,610190,WARRIORS ALL-STARS,2017-08-29,20000 - 50000,10,0,59.99,0,0,51388,0,102435,0,"KOEI TECMO GAMES CO., LTD.","KOEI TECMO GAMES CO., LTD.","Single-player,Steam Achievements,Steam Trading...",Action,"Action,Musou,Hack and Slash,Anime,Singleplayer",2017
7529,554640,拯救大魔王2 Rescue the Great Demon 2,2016-11-18,100000 - 200000,0,0,0.0,0,0,49555,0,99108,0,吃了就睡工作室,indienova,"Single-player,Steam Trading Cards","Adventure,Free to Play,Indie,RPG","Free to Play,RPG,Indie,Adventure,RPGMaker",2016
60423,1369370,Combat Mission Shock Force 2,2020-08-31,0 - 20000,25,0,59.99,0,0,47336,0,47336,0,Battlefront,Slitherine Ltd.,"Single-player,Multi-player,PvP,Online PvP,LAN ...","Simulation,Strategy","Simulation,Strategy,RTS,Wargame,Turn-Based Str...",2020


In [64]:

# Plot
fig1 = px.bar(
    top10_played,
    x='Average playtime forever',
    y='Name',
    orientation='h',
    title='Top 10 Most-Played Games by Average Playtime Forever',
    labels={'Average playtime forever': 'Avg Playtime (minutes)', 'Name': 'Game'}
)
fig1.update_layout(yaxis={'categoryorder': 'total ascending'})
fig1.show()


In [65]:
# Count games per year
year_count = df['Year'].value_counts().reset_index()
year_count.columns = ['Year', 'Count']
year_count = year_count.sort_values('Year')
year_count

Unnamed: 0,Year,Count
24,1997,2
28,1998,1
23,1999,3
25,2000,2
21,2001,4
27,2002,1
22,2003,3
20,2004,7
19,2005,7
18,2006,69


In [66]:

# Plot
fig2 = px.line(
    year_count,
    x='Year',
    y='Count',
    title=' Games Released by Year',
    labels={'Count': 'Number of Games', 'Year': 'Release Year'}
)
fig2.update_layout(xaxis_title="Year", yaxis_title="Games")
fig2.show()


In [67]:
# Group by publisher and calculate average user score
scores = df.groupby('Publishers')['User score'].mean().reset_index()

# Sort and take top 10
top_scores = scores.sort_values('User score', ascending=False).head(10)
top_scores

Unnamed: 0,Publishers,User score
25198,Maya Games,100.0
33266,"Remtairy,Kagura Games",97.0
29442,"Outlawed Games,Ecchi Empire",92.0
32151,R I MAD,88.0
5604,Boobs Dev,77.0
163,1bit,55.0
591,"7DOTS,Phoenix_co",41.0
27369,Neko Climax Studios,38.6
41019,Totem Entertainment,36.5
8881,Curious Studio,32.0


In [68]:

# Plot
fig3 = px.bar(
    top_scores,
    x='Publishers',
    y='User score',
    title='Top Publishers by Avg User Score',
    labels={'Publishers': 'Publisher', 'User score': 'Avg User Score'}
)

fig3.update_layout(
    xaxis_title="Publisher",
    yaxis_title="Score",
    xaxis_tickangle=-45
)

fig3.show()


In [69]:
# Step 1: Split genres
# - Fill missing values with empty strings
# - Split each genre string by commas into a list
df['Genres'] = df['Genres'].fillna('').str.split(',')

# Step 2: Clean genres and extract unique ones
# - Define a function to clean each genre in a list
# - Removes brackets, quotes, and extra spaces
# - Capitalizes the genre (e.g., 'action' -> 'Action')
def clean_genres(genres):
    return [g.strip("[]\"' ").title() for g in genres if g.strip("[]\"' ")]

# Apply the cleaning function to each row in the 'Genres' column
df['Genres'] = df['Genres'].apply(clean_genres)

# Flatten the list of genres and use a set to get unique genres
# - Then sort the result alphabetically
unique_genres = sorted({genre for genres in df['Genres'] for genre in genres})

# Output the final list of cleaned, unique genres
print(unique_genres)


['360 Video', 'Accounting', 'Action', 'Adventure', 'Animation & Modeling', 'Audio Production', 'Casual', 'Design & Illustration', 'Documentary', 'Early Access', 'Education', 'Episodic', 'Free To Play', 'Game Development', 'Gore', 'Indie', 'Massively Multiplayer', 'Movie', 'No Genres', 'Nudity', 'Photo Editing', 'Racing', 'Rpg', 'Sexual Content', 'Short', 'Simulation', 'Software Training', 'Sports', 'Strategy', 'Tutorial', 'Utilities', 'Video Production', 'Violent', 'Web Publishing']


In [2]:
import sqlite3
import pandas as pd
import plotly.express as px

# Connect to the SQLite database
conn = sqlite3.connect("Steam_Games_db.sqlite")  # Update path if needed

# Load relevant data from the table
df = pd.read_sql("SELECT `Genres`, `Average playtime forever` FROM general_info;", conn)

# Remove rows with zero or missing playtime or genres
df = df[df["Average playtime forever"] > 0]
df = df.dropna(subset=["Genres"])

# Split comma-separated genres and explode them into separate rows
df["Genres"] = df["Genres"].str.split(",")
df_exploded = df.explode("Genres")

# List of non-game genres to remove
non_game_genres = {
    "Audio Production", "Utilities", "Game Development", "Software Training",
    "Design & Illustration", "Animation & Modeling", "Video Production",
    "Photo Editing", "Accounting", "Education", "Web Publishing"
}

# Filter out non-game genres
df_filtered = df_exploded[~df_exploded["Genres"].isin(non_game_genres)]

# Group by genre and compute average playtime
genre_avg_playtime = df_filtered.groupby("Genres")["Average playtime forever"].mean()

# Get top 10 genres by average playtime
top_10_final_genres = genre_avg_playtime.sort_values(ascending=False).head(10).reset_index()
top_10_final_genres.columns = ["Genre", "Average Playtime (minutes)"]

# Create interactive pie chart
fig = px.pie(
    top_10_final_genres,
    names="Genre",
    values="Average Playtime (minutes)",
    title="Top 10 Game Genres by Average Playtime (Plotly)",
  
)

fig.show()


In [70]:
# Initialize Dash app with Bootstrap theme for styling
app = Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

# Set the browser tab title for the app
app.title = "Steam Game Visualizations"

# Define the layout of the app using Bootstrap containers and rows
app.layout = dbc.Container([
    
    # App title displayed at the top center of the dashboard
    html.H1("Steam Games Dashboard", className="text-center my-4"),

    # Create a row with two columns: one for dropdown, one for graphs
    dbc.Row([
        
        # Left column: dropdown filter (3 units wide)
        dbc.Col([
            html.Label("Filter by Genre:"),  # Label for the dropdown
            dcc.Dropdown(
                id='genre-dropdown',  # ID to link it with the callback
                options=[{'label': g, 'value': g} for g in unique_genres],  # Populate dropdown with genres
                placeholder="Select genre"  # Default placeholder text
            )
        ], width=3),  # Left sidebar column (3/12 of the row width)

        # Right column: contains three graphs stacked vertically (9 units wide)
        dbc.Col([
            dcc.Graph(id='playtime-bar', style={'height': '500px'}),  # Graph 1: Top 10 games by playtime
            dcc.Graph(id='year-line', style={'height': '400px'}),     # Graph 2: Games released over time
            dcc.Graph(figure=fig3, id='rating-bar', style={'height': '450px'})  # Graph 3: Static rating chart
        ], width=9)  # Main graph area (9/12 of the row width)
    ])
], fluid=True)  # Enable responsive resizing for all screen sizes

# Define callback to update two graphs when a genre is selected
@app.callback(
    [Output('playtime-bar', 'figure'),  # Update bar chart
     Output('year-line', 'figure')],    # Update line chart
    [Input('genre-dropdown', 'value')]  # Triggered by dropdown selection
)
def update_graphs(selected_genre):
    # If a genre is selected, filter dataframe rows where genre is in the list
    if selected_genre:
        filtered = df[df['Genres'].apply(lambda genres: selected_genre in genres)]
    else:
        filtered = df  # If no genre selected, show full data

    # Create data for Top 10 most-played games (by average playtime)
    playtime = filtered[['Name', 'Average playtime forever']].dropna()
    playtime = playtime[playtime['Average playtime forever'] > 0]  # Remove zero-play games
    top10 = playtime.sort_values('Average playtime forever', ascending=False).head(10)

    # Create horizontal bar chart using Plotly Express
    fig1 = px.bar(
        top10,
        x='Average playtime forever',
        y='Name',
        orientation='h',
        title="Top 10 Most-Played Games by Average Playtime Forever"
    )
    fig1.update_layout(
        xaxis_title="Playtime in minutes",
        yaxis_title="Game",
        yaxis={'categoryorder': 'total ascending'}  # this ensures bars are ordered by playtime
    )

    # Count number of games released per year
    year_count = filtered['Year'].value_counts().reset_index()
    year_count.columns = ['Year', 'Count']  # Rename columns
    year_count = year_count.sort_values('Year')  # Sort by year

    # Create line chart showing game release trend over years
    fig2 = px.line(
        year_count,
        x='Year',
        y='Count',
        title='Games Released by Year'
    )
    fig2.update_layout(
        xaxis_title="Year",
        yaxis_title="Games"
    )

    # Return both figures to update their respective graphs
    return fig1, fig2

# # Run the app in Jupyter Notebook (use "external" to see it in a  browser or "inline" to run it within the notbook)
app.run(jupyter_mode="external")


Dash app running on http://127.0.0.1:8050/
