In [None]:
import os
import pandas as pd
from dotenv import load_dotenv
from steam_web_api import Steam
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load environment variables
load_dotenv()
KEY = os.getenv("STEAM_API_KEY")
steam = Steam(KEY)

In [None]:
#Open the normies_random.txt to read in the userID's we will use for the dataset
with open("normies_random.txt", "r") as f:
    Normie_ids = [line.strip() for line in f if line.strip()]

In [None]:
#Inputting a normie userID in Steam's getownedgames
#taking the data returned and making a list of dataframes for each user and their data
normie_data = []
for id in Normie_ids:
    try:
        response = steam.users.get_owned_games(id)
        if 'games' in response and response['games']:
            games_df = pd.DataFrame(response['games'])
            game_count = games_df.shape[0]
            # adding all the minutes played and converting to hours
            total_hours_played = games_df['playtime_forever'].sum() / 60  
            max_playtime = games_df['playtime_forever'].max()      
            # had to add a check since some players have not played within the 2 week span
            if 'playtime_2weeks' in games_df.columns:
                games_df['playtime_2weeks'] = games_df['playtime_2weeks'].sum() 
            else: 
                games_df['playtime_2weeks'] = 0  # if no 'playtime_2weeks'

            # get the max playtime and find the most played game(s)
            most_played_games = games_df[games_df['playtime_forever'] == max_playtime]['name'].tolist()
            # join the most played games list into a string, to handle ties
            most_played_res = ', '.join(most_played_games)
            

            # renaming columns for consistency
            games_df['steam_id'] = id
            games_df['game_count'] = game_count
            games_df['total_hours_account'] = round(total_hours_played, 2)
            games_df['most_played_game'] = most_played_res

            # Use consistent column names
            games_df = games_df.rename(columns={'total_hours_account': 'total_hours_account (hrs)', 'playtime_2weeks': 'playtime_2weeks (mins)', 'playtime_forever': 'playtime_forever (mins)'})

            normie_data.append(games_df[['steam_id', 'game_count', 'total_hours_account (hrs)', 'most_played_game', 'name', 'playtime_forever', 'playtime_2weeks (mins)']])
            # print(games_df) 
        else:
            normie_data.append(pd.DataFrame([{
            'steam_id': id,
            'game_count': 0,
            'total_hours_account (hrs)': 0,
            'most_played_game': [],
            'name': None,
            'playtime_forever (mins)': 0.0,
            'playtime_2weeks (mins)': 0.0
        }]))
            print(f"No games found for Steam ID {id}")
    except Exception as e:
        print(f"Error processing Steam ID {id}: {e}")

In [None]:
# output the normies data to a csv
# group the data and dump as CSV file
normie_df = pd.concat(data, ignore_index=True)
normie_df.rename(columns={'name': 'game_name'}, inplace=True)
normie_df.to_csv("steam_user_games_data_normies.csv", index=False)

In [None]:
# graph out the top 10 most played gmes in normies
normie_most_played = normie_df.groupby('game_name')['playtime_forever'].sum().nlargest(10)

# Plotting the top 10 games
plt.figure(figsize=(12, 8))
sns.barplot(x=normie_most_played.values, y=normie_most_played.index, palette="viridis")
plt.title("Top 10 Most Played Games by Total Hours (Normie Gamers)")
plt.xlabel("Total Hours Played")
plt.ylabel("Game Title")
plt.ticklabel_format(style='plain', axis='x')

plt.show()

In [None]:
#Open the casual_random.txt to read in the userID's we will use for the dataset
with open("casuals_random.txt", "r") as f:
    Casual_ids = [line.strip() for line in f if line.strip()]

In [None]:
#Inputting a casual userID in Steam's getownedgames
#taking the data returned and making a list of dataframes for each user and their data
casual_data = []
for id in Casual_ids:
    try:
        response = steam.users.get_owned_games(id)
        if 'games' in response and response['games']:
            games_df = pd.DataFrame(response['games'])
            game_count = games_df.shape[0]
            # adding all the minutes played and converting to hours
            total_hours_played = games_df['playtime_forever'].sum() / 60  
            max_playtime = games_df['playtime_forever'].max()      
            # had to add a check since some players have not played within the 2 week span
            if 'playtime_2weeks' in games_df.columns:
                games_df['playtime_2weeks'] = games_df['playtime_2weeks'].sum() 
            else: 
                games_df['playtime_2weeks'] = 0  # if no 'playtime_2weeks'

            # get the max playtime and find the most played game(s)
            most_played_games = games_df[games_df['playtime_forever'] == max_playtime]['name'].tolist()
            # join the most played games list into a string, to handle ties
            most_played_res = ', '.join(most_played_games)
            

            # renaming columns for consistency
            games_df['steam_id'] = id
            games_df['game_count'] = game_count
            games_df['total_hours_account'] = round(total_hours_played, 2)
            games_df['most_played_game'] = most_played_res

            # Use consistent column names
            games_df = games_df.rename(columns={'total_hours_account': 'total_hours_account (hrs)', 'playtime_2weeks': 'playtime_2weeks (mins)', 'playtime_forever': 'playtime_forever (mins)'})

            casual_data.append(games_df[['steam_id', 'game_count', 'total_hours_account (hrs)', 'most_played_game', 'name', 'playtime_forever', 'playtime_2weeks (mins)']])
            # print(games_df) 
        else:
            casual_data.append(pd.DataFrame([{
            'steam_id': id,
            'game_count': 0,
            'total_hours_account (hrs)': 0,
            'most_played_game': [],
            'name': None,
            'playtime_forever (mins)': 0.0,
            'playtime_2weeks (mins)': 0.0
        }]))
            print(f"No games found for Steam ID {id}")
    except Exception as e:
        print(f"Error processing Steam ID {id}: {e}")

In [None]:
# output the casuals data to a csv
casual_df = pd.concat(casual_data, ignore_index=True)
casual_df.rename(columns={'name': 'most_played_game'}, inplace=True)
casual_df.to_csv("steam_user_games_data_casuals.csv", index=False)

In [None]:
# graph out the top 10 most played gmes in casuals
casual_most_played_games = casual_df.groupby('most_played_game')['playtime_hours'].sum().nlargest(10)

# Top 10
plt.figure(figsize=(12, 8))
sns.barplot(x=casual_most_played_games.values, y=casual_most_played_games.index, palette="viridis")
plt.title("Top 10 Most Played Games by Total Hours (Casual Gamers)")
plt.xlabel("Total Hours Played")
plt.ylabel("Game Title")
plt.show()

In [None]:
#Open the gamers_random.txt to read in the userID's we will use for the dataset
with open("gamers_random.txt", "r") as f:
    Gamer_ids = [line.strip() for line in f if line.strip()]

In [None]:
#Inputting a gamer userID in Steam's getownedgames
#taking the data returned and making a list of dataframes for each user and their data
gamer_data = []
for id in Gamer_ids:
    try:
        response = steam.users.get_owned_games(id)
        if 'games' in response and response['games']:
            games_df = pd.DataFrame(response['games'])
            game_count = games_df.shape[0]
            # adding all the minutes played and converting to hours
            total_hours_played = games_df['playtime_forever'].sum() / 60  
            max_playtime = games_df['playtime_forever'].max()      
            # had to add a check since some players have not played within the 2 week span
            if 'playtime_2weeks' in games_df.columns:
                games_df['playtime_2weeks'] = games_df['playtime_2weeks'].sum() 
            else: 
                games_df['playtime_2weeks'] = 0  # if no 'playtime_2weeks'

            # get the max playtime and find the most played game(s)
            most_played_games = games_df[games_df['playtime_forever'] == max_playtime]['name'].tolist()
            # join the most played games list into a string, to handle ties
            most_played_res = ', '.join(most_played_games)
            

            # renaming columns for consistency
            games_df['steam_id'] = id
            games_df['game_count'] = game_count
            games_df['total_hours_account'] = round(total_hours_played, 2)
            games_df['most_played_game'] = most_played_res

            # Use consistent column names
            games_df = games_df.rename(columns={'total_hours_account': 'total_hours_account (hrs)', 'playtime_2weeks': 'playtime_2weeks (mins)', 'playtime_forever': 'playtime_forever (mins)'})

            gamer_data.append(games_df[['steam_id', 'game_count', 'total_hours_account (hrs)', 'most_played_game', 'name', 'playtime_forever', 'playtime_2weeks (mins)']])
            # print(games_df) 
        else:
            gamer_data.append(pd.DataFrame([{
            'steam_id': id,
            'game_count': 0,
            'total_hours_account (hrs)': 0,
            'most_played_game': [],
            'name': None,
            'playtime_forever (mins)': 0.0,
            'playtime_2weeks (mins)': 0.0
        }]))
            print(f"No games found for Steam ID {id}")
    except Exception as e:
        print(f"Error processing Steam ID {id}: {e}")

In [None]:
# output the gamers data to a csv
gamer_df = pd.concat(gamer_data, ignore_index=True)
gamer_df.rename(columns={'name': 'most_played_game'}, inplace=True)
gamer_df.to_csv("steam_user_games_data_gamers.csv", index=False)

In [None]:
# graph out the top 10 most played gmes in gamers
gamer_most_played_games = gamer_df.groupby('most_played_game')['playtime_hours'].sum().nlargest(10)

# Top 10
plt.figure(figsize=(12, 8))
sns.barplot(x=gamer_most_played_games.values, y=gamer_most_played_games.index, palette="viridis")
plt.title("Top 10 Most Played Games by Total Hours (Casual Gamers)")
plt.xlabel("Total Hours Played")
plt.ylabel("Game Title")
plt.show()