In [15]:
from google.colab import files
import csv

uploaded = files.upload()

class SteamDataAnalyzer:
    def __init__(self, file_path):
        self.file_path = file_path
        self.games = []
        self.load_data()

    def load_data(self):
        with open(self.file_path, mode='r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            self.games = list(reader)

    def get_free_vs_paid_percentage(self):
        total_games = len(self.games)
        free_games = sum(1 for game in self.games if game['Price'] == '0')
        paid_games = total_games - free_games
        return {
            'free': (free_games / total_games) * 100,
            'paid': (paid_games / total_games) * 100
        }

    def get_year_with_most_games(self):
        year_count = {}
        for game in self.games:
            year = game['Release date'].split('-')[0]
            if year in year_count:
                year_count[year] += 1
            else:
                year_count[year] = 1

        max_count = max(year_count.values())
        years = [year for year, count in year_count.items() if count == max_count]
        return years

    def get_most_prolific_developer(self):
        developer_count = {}
        for game in self.games:
            developers = game['Developers'].split(';')
            for dev in developers:
                dev = dev.strip()
                if dev in developer_count:
                    developer_count[dev] += 1
                else:
                    developer_count[dev] = 1

        max_games = max(developer_count.values())
        most_prolific = [dev for dev, count in developer_count.items() if count == max_games]
        return most_prolific

Saving steam_games.csv to steam_games (1).csv


In [19]:
analyzer = SteamDataAnalyzer('/content/steam_games.csv')

In [21]:
for i, game in enumerate(analyzer.games[:5]):
    print(f"Jogo {i+1}: {game}")

Jogo 1: {'AppID': '20200', 'Name': 'Galactic Bowling', 'Release date': 'Oct 21, 2008', 'Estimated owners': '0 - 20000', 'Peak CCU': '0', 'Required age': '0', 'Price': '19.99', 'DLC count': '0', 'About the game': 'Galactic Bowling is an exaggerated and stylized bowling game with an intergalactic twist. Players will engage in fast-paced single and multi-player competition while being submerged in a unique new universe filled with over-the-top humor, wild characters, unique levels, and addictive game play. The title is aimed at players of all ages and skill sets. Through accessible and intuitive controls and game-play, Galactic Bowling allows you to jump right into the action. A single-player campaign and online play allow you to work your way up the ranks of the Galactic Bowling League! Whether you have hours to play or only a few minutes, Galactic Bowling is a fast paced and entertaining experience that will leave you wanting more! Full Single-player story campaign including 11 Characte

In [23]:
import csv

# Definição da classe para análise de dados da Steam
class SteamDataAnalyzer:
    def __init__(self, file_path):
        self.file_path = file_path
        self.games = []
        self.load_data()

    def load_data(self):
        with open(self.file_path, mode='r', encoding='utf-8') as file:
            reader = csv.DictReader(file)
            self.games = list(reader)

    def get_free_vs_paid_percentage(self):
        total_games = len(self.games)
        free_games = sum(1 for game in self.games if game['Price'].strip() == '0' or game['Price'].strip() == '0.00')
        paid_games = total_games - free_games
        return {
            'free': (free_games / total_games) * 100 if total_games else 0,
            'paid': (paid_games / total_games) * 100 if total_games else 0
        }

    def get_year_with_most_games(self):
        year_count = {}
        for game in self.games:
            release_date = game['Release date'].strip()

            # Lidar com diferentes formatos de data ou dados ausentes
            if '-' in release_date:  # Formato YYYY-MM-DD
                year = release_date.split('-')[0]
            elif ',' in release_date:  # Formato "Dec 1, 2022"
                year = release_date.split(',')[-1].strip()
            else:
                year = 'Unknown'

            if year not in year_count:
                year_count[year] = 0
            year_count[year] += 1

        # Remover entradas 'Unknown' se presentes
        if 'Unknown' in year_count:
            del year_count['Unknown']

        max_count = max(year_count.values())
        years = [year for year, count in year_count.items() if count == max_count]
        return years

    def get_most_prolific_developer(self):
        developer_count = {}
        for game in self.games:
            developers = game['Developers'].split(';')
            for dev in developers:
                dev = dev.strip()
                if dev:
                    if dev not in developer_count:
                        developer_count[dev] = 0
                    developer_count[dev] += 1

        max_games = max(developer_count.values(), default=0)
        most_prolific = [dev for dev, count in developer_count.items() if count == max_games]
        return most_prolific

# Instanciação da classe com o caminho do arquivo no sample_data
analyzer = SteamDataAnalyzer('/content/sample_data/steam_games.csv')

# Consulta 1: Percentual de jogos gratuitos e pagos
free_vs_paid = analyzer.get_free_vs_paid_percentage()
print(f"Percentual de jogos gratuitos: {free_vs_paid['free']:.2f}%")
print(f"Percentual de jogos pagos: {free_vs_paid['paid']:.2f}%")

# Consulta 2: Ano com o maior número de jogos lançados
most_active_years = analyzer.get_year_with_most_games()
print(f"Ano(s) com o maior número de jogos lançados: {', '.join(most_active_years)}")

# Consulta 3: Desenvolvedor mais prolífico
most_prolific_dev = analyzer.get_most_prolific_developer()
print(f"Desenvolvedor(es) mais prolífico(s): {', '.join(most_prolific_dev)}")

Percentual de jogos gratuitos: 0.00%
Percentual de jogos pagos: 100.00%
Ano(s) com o maior número de jogos lançados: 2022
Desenvolvedor(es) mais prolífico(s): Choice of Games


In [24]:
import random

# Selecionar 20 jogos aleatórios da lista completa
sample_games = random.sample(analyzer.games, 20)

# Salvar a amostra em um novo arquivo CSV
sample_file_path = '/content/sample_data/sample_steam_games.csv'

with open(sample_file_path, mode='w', encoding='utf-8', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=analyzer.games[0].keys())
    writer.writeheader()
    writer.writerows(sample_games)

print(f"Amostra de 20 jogos salva em: {sample_file_path}")

Amostra de 20 jogos salva em: /content/sample_data/sample_steam_games.csv


In [26]:
# Selecione jogos gratuitos e pagos separadamente
free_games = [game for game in analyzer.games if game['Price'].strip() == '0' or game['Price'].strip() == '0.00']
paid_games = [game for game in analyzer.games if game['Price'].strip() != '0' and game['Price'].strip() != '0.00']

# Selecione 10 de cada para garantir diversidade
sample_free = random.sample(free_games, min(10, len(free_games)))
sample_paid = random.sample(paid_games, min(10, len(paid_games)))

# Combine as amostras
sample_games = sample_free + sample_paid

# Salvar a amostra em um novo arquivo CSV
sample_file_path = '/content/sample_data/sample_steam_games.csv'

with open(sample_file_path, mode='w', encoding='utf-8', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=analyzer.games[0].keys())
    writer.writeheader()
    writer.writerows(sample_games)

print(f"Amostra diversificada de 20 jogos salva em: {sample_file_path}")

Amostra diversificada de 20 jogos salva em: /content/sample_data/sample_steam_games.csv


In [27]:
# Instanciação da classe com o arquivo da amostra
sample_analyzer = SteamDataAnalyzer('/content/sample_data/sample_steam_games.csv')

# Consulta 1: Percentual de jogos gratuitos e pagos na amostra
sample_free_vs_paid = sample_analyzer.get_free_vs_paid_percentage()
print(f"Percentual de jogos gratuitos na amostra: {sample_free_vs_paid['free']:.2f}%")
print(f"Percentual de jogos pagos na amostra: {sample_free_vs_paid['paid']:.2f}%")

# Consulta 2: Ano com o maior número de jogos lançados na amostra
sample_most_active_years = sample_analyzer.get_year_with_most_games()
print(f"Ano(s) com o maior número de jogos lançados na amostra: {', '.join(sample_most_active_years)}")

# Consulta 3: Desenvolvedor mais prolífico na amostra
sample_most_prolific_dev = sample_analyzer.get_most_prolific_developer()
print(f"Desenvolvedor(es) mais prolífico(s) na amostra: {', '.join(sample_most_prolific_dev)}")

Percentual de jogos gratuitos na amostra: 0.00%
Percentual de jogos pagos na amostra: 100.00%
Ano(s) com o maior número de jogos lançados na amostra: 2022
Desenvolvedor(es) mais prolífico(s) na amostra: FlexileStudio, Ready To Play, LP Studio, BITMEUP, Challenging Games, WZOGI, CMS Games LLC, Mcpoyle Games, W, Tom Brinton,Alex Morris,Nathan Thomson,Christian Walter
