In [11]:
import sqlite3
import requests
import json
import time
import os
from bs4 import BeautifulSoup
from datetime import datetime
import tempfile
import re
import random

# Fonction pour télécharger la base de données steam_games.db
def download_db(url, local_path):
    response = requests.get(url, stream=True)
    if response.status_code == 200:
        with open(local_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        return True
    print(f"Échec du téléchargement de la base de données. Code de statut: {response.status_code}")
    return False

# Fonction pour obtenir les détails d'un jeu via l'API Steam
def get_game_details(steam_game_id):
    url = f"https://store.steampowered.com/api/appdetails?appids={steam_game_id}"
    response = requests.get(url)
    if response.status_code == 200:
        data = response.json()
        if data[str(steam_game_id)]['success']:
            return data[str(steam_game_id)]['data']
    return None

# Fonction pour obtenir les informations de la page Steam d'un jeu
def get_steam_page_info(app_id):
    url = f"https://store.steampowered.com/app/{app_id}/"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        response = requests.get(url, headers=headers)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        # Vérifier la présence de contenu généré par IA
        ai_disclosure = soup.find(string=re.compile("AI GENERATED CONTENT DISCLOSURE", re.IGNORECASE))
        
        # Recherche de la section "AI Generated Content Disclosure"
        ai_section = soup.find('h2', string='AI Generated Content Disclosure')
        ai_generated = bool(ai_section)
        ai_content = None
        if ai_generated:
            ai_paragraph = ai_section.find_next('i')
            if ai_paragraph:
                ai_content = ai_paragraph.text.strip()

        # Récupérer les tags
        tag_elements = soup.find_all('a', class_='app_tag')
        tags = [tag.text.strip() for tag in tag_elements]
        
        return {
            'ai_generated': bool(ai_disclosure),
            'ai_content': ai_content,
            'tags': tags,
        }
    except Exception as e:
        print(f"Erreur lors du scraping pour le jeu {app_id}: {e}")
        return None

def execute_with_retry(cursor, sql, params=None, max_attempts=5, delay=1):
    for attempt in range(max_attempts):
        try:
            if params:
                cursor.execute(sql, params)
            else:
                cursor.execute(sql)
            return
        except sqlite3.OperationalError as e:
            if "database is locked" in str(e) and attempt < max_attempts - 1:
                print(f"Database is locked. Retrying in {delay} seconds...")
                time.sleep(delay)
                delay *= 2  # Augmenter le délai exponentiellement
            else:
                raise


def ensure_column_exists(cursor, table_name, column_name, column_type):
    """Vérifie si une colonne existe dans la table et l'ajoute si ce n'est pas le cas."""
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = [column[1] for column in cursor.fetchall()]
    if column_name not in columns:
        cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}")
        print(f"Colonne '{column_name}' ajoutée à la table {table_name}")



# Fonction principale

def update_aug_steam_games():
    # Charger le dernier game_id traité
    try:
        with open('last_processed_game_id.txt', 'r') as f:
            last_processed_id = int(f.read().strip())
    except FileNotFoundError:
        last_processed_id = 12570  # Valeur par défaut si le fichier n'existe pas

    # Télécharger steam_games.db
    db_url = f"https://raw.githubusercontent.com/{os.getenv('PAT_GITHUB_USERNAME')}/steampage-creation-date/main/steam_games.db"
    with tempfile.NamedTemporaryFile(delete=False, suffix='.db') as temp_db:
        if not download_db(db_url, temp_db.name):
            print("Échec du téléchargement de steam_games.db")
            return

        conn_steam = None
        conn_aug = None
        try:
            # Connexion aux bases de données
            conn_steam = sqlite3.connect(temp_db.name)
            conn_aug = sqlite3.connect('aug_steam_games.db', timeout=20)
            cursor_steam = conn_steam.cursor()
            cursor_aug = conn_aug.cursor()

            # Vérifier et ajouter la colonne 'type' si nécessaire
            ensure_column_exists(cursor_aug, 'aug_steam_games', 'type', 'TEXT')

            # Récupérer les 500 prochains game_ids
            execute_with_retry(cursor_steam, "SELECT steam_game_id FROM games WHERE steam_game_id > ? ORDER BY steam_game_id LIMIT 1000", (last_processed_id,))
            game_ids = cursor_steam.fetchall()

            for game_id in game_ids:
                game_id = game_id[0]
                print(f"Traitement du game_id : {game_id}")
                game_data = get_game_details(game_id)
                if game_data:
                    steam_page_info = get_steam_page_info(game_id)

                    # Préparer les données pour l'insertion
                    add_date = int(time.time())
                    game_type = game_data.get('type', 'Unknown')
                    dev = ', '.join(game_data.get('developers', []))
                    publisher = ', '.join(game_data.get('publishers', []))
                    tags = ', '.join(steam_page_info['tags']) if steam_page_info else ''
                    release_date = game_data.get('release_date', {}).get('date', '')
                    description = game_data.get('short_description', '')
                    ai_generated = 'Yes' if steam_page_info and steam_page_info['ai_generated'] else 'No'
                    ai_content = steam_page_info['ai_content'] if steam_page_info and steam_page_info['ai_generated'] else None
                    content_descriptors = game_data.get('content_descriptors', {})
                    content_descriptors_ids = content_descriptors.get('ids', [])
                    content_descriptors_str = ', '.join(map(str, content_descriptors_ids))
                    supported_languages = game_data.get('supported_languages', '')
                    free = 'Yes' if game_data.get('is_free', False) else 'No'
                    dlc = 'Yes' if game_data.get('type', '') == 'dlc' else 'No'

                    # Insérer ou mettre à jour les données
                    execute_with_retry(cursor_aug, '''
                    INSERT OR REPLACE INTO aug_steam_games
                    (game_id, add_date, type, dev, publisher, tags, release_date, description, ai_generated, ai_content, 
                    content_descriptors, supported_languages, free, dlc)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                    ''', (game_id, add_date, game_type, dev, publisher, tags, release_date, description, ai_generated, ai_content,
                          content_descriptors_str, supported_languages, free, dlc))

                # Mettre à jour le dernier game_id traité
                with open('last_processed_game_id.txt', 'w') as f:
                    f.write(str(game_id))

                # Pause pour éviter de surcharger l'API
                time.sleep(random.uniform(0.5, 1.7))

            # Commit des changements
            conn_aug.commit()
            
        except Exception as e:
            print(f"Une erreur est survenue : {e}")
            if conn_aug:
                conn_aug.rollback()
        finally:
            if cursor_steam:
                cursor_steam.close()
            if cursor_aug:
                cursor_aug.close()
            if conn_steam:
                conn_steam.close()
            if conn_aug:
                conn_aug.close()
    
    # Supprimer le fichier temporaire
    os.unlink(temp_db.name)

if __name__ == "__main__":
    update_aug_steam_games()

Traitement du game_id : 33280
Traitement du game_id : 33287
Traitement du game_id : 33288
Traitement du game_id : 33289
Traitement du game_id : 33290
Traitement du game_id : 33310
Traitement du game_id : 33320
Traitement du game_id : 33325
Traitement du game_id : 33326
Traitement du game_id : 33349
Traitement du game_id : 33359
Traitement du game_id : 33360
Traitement du game_id : 33361
Traitement du game_id : 33362
Traitement du game_id : 33370
Traitement du game_id : 33371
Traitement du game_id : 33390
Traitement du game_id : 33400
Traitement du game_id : 33420
Traitement du game_id : 33428
Traitement du game_id : 33435
Traitement du game_id : 33436
Traitement du game_id : 33446
Traitement du game_id : 33447
Traitement du game_id : 33448
Traitement du game_id : 33456
Traitement du game_id : 33457
Traitement du game_id : 33460
Traitement du game_id : 33500
Traitement du game_id : 33510
Traitement du game_id : 33520
Traitement du game_id : 33530
Traitement du game_id : 33540
Traitement

KeyboardInterrupt: 

In [13]:
import sqlite3
import requests
import json
import time
import os
from bs4 import BeautifulSoup
from datetime import datetime
import tempfile
import re
import random
import logging

# Configuration du logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def download_db(url, local_path):
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        with open(local_path, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        logging.info(f"Base de données téléchargée avec succès: {local_path}")
        return True
    except requests.RequestException as e:
        logging.error(f"Échec du téléchargement de la base de données: {e}")
        return False

def get_game_details(steam_game_id):
    url = f"https://store.steampowered.com/api/appdetails?appids={steam_game_id}"
    try:
        response = requests.get(url, timeout=10)
        response.raise_for_status()
        data = response.json()
        if data[str(steam_game_id)]['success']:
            logging.info(f"Détails récupérés pour le jeu {steam_game_id}")
            return data[str(steam_game_id)]['data']
        else:
            logging.warning(f"Pas de données pour le jeu {steam_game_id}")
    except requests.RequestException as e:
        logging.error(f"Erreur lors de la récupération des détails pour le jeu {steam_game_id}: {e}")
    except json.JSONDecodeError as e:
        logging.error(f"Erreur de décodage JSON pour le jeu {steam_game_id}: {e}")
    return None

def get_steam_page_info(app_id):
    url = f"https://store.steampowered.com/app/{app_id}/"
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
    }
    
    try:
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        ai_disclosure = soup.find(string=re.compile("AI GENERATED CONTENT DISCLOSURE", re.IGNORECASE))
        ai_section = soup.find('h2', string='AI Generated Content Disclosure')
        ai_generated = bool(ai_section)
        ai_content = None
        if ai_generated:
            ai_paragraph = ai_section.find_next('i')
            if ai_paragraph:
                ai_content = ai_paragraph.text.strip()

        tag_elements = soup.find_all('a', class_='app_tag')
        tags = [tag.text.strip() for tag in tag_elements]
        
        logging.info(f"Informations de la page Steam récupérées pour le jeu {app_id}")
        return {
            'ai_generated': bool(ai_disclosure),
            'ai_content': ai_content,
            'tags': tags,
        }
    except requests.RequestException as e:
        logging.error(f"Erreur lors du scraping pour le jeu {app_id}: {e}")
    except Exception as e:
        logging.error(f"Erreur inattendue lors du scraping pour le jeu {app_id}: {e}")
    return None

def execute_with_retry(cursor, sql, params=None, max_attempts=5, delay=1):
    for attempt in range(max_attempts):
        try:
            if params:
                cursor.execute(sql, params)
            else:
                cursor.execute(sql)
            return
        except sqlite3.OperationalError as e:
            if "database is locked" in str(e) and attempt < max_attempts - 1:
                logging.warning(f"Base de données verrouillée. Nouvelle tentative dans {delay} secondes...")
                time.sleep(delay)
                delay *= 2
            else:
                raise

def ensure_column_exists(cursor, table_name, column_name, column_type):
    cursor.execute(f"PRAGMA table_info({table_name})")
    columns = [column[1] for column in cursor.fetchall()]
    if column_name not in columns:
        cursor.execute(f"ALTER TABLE {table_name} ADD COLUMN {column_name} {column_type}")
        logging.info(f"Colonne '{column_name}' ajoutée à la table {table_name}")

def update_aug_steam_games():
    try:
        with open('last_processed_game_id.txt', 'r') as f:
            last_processed_id = int(f.read().strip())
        logging.info(f"Dernier game_id traité: {last_processed_id}")
    except FileNotFoundError:
        last_processed_id = 12570
        logging.info(f"Fichier last_processed_game_id.txt non trouvé. Utilisation de la valeur par défaut: {last_processed_id}")

    db_url = f"https://raw.githubusercontent.com/{os.getenv('PAT_GITHUB_USERNAME')}/steampage-creation-date/main/steam_games.db"
    with tempfile.NamedTemporaryFile(delete=False, suffix='.db') as temp_db:
        if not download_db(db_url, temp_db.name):
            logging.error("Échec du téléchargement de steam_games.db")
            return

        conn_steam = None
        conn_aug = None
        try:
            conn_steam = sqlite3.connect(temp_db.name)
            conn_aug = sqlite3.connect('aug_steam_games.db', timeout=20)
            cursor_steam = conn_steam.cursor()
            cursor_aug = conn_aug.cursor()

            ensure_column_exists(cursor_aug, 'aug_steam_games', 'type', 'TEXT')

            execute_with_retry(cursor_steam, "SELECT steam_game_id FROM games WHERE steam_game_id > ? ORDER BY steam_game_id LIMIT 1000", (last_processed_id,))
            game_ids = cursor_steam.fetchall()

            for game_id in game_ids:
                game_id = game_id[0]
                logging.info(f"Traitement du game_id : {game_id}")
                game_data = get_game_details(game_id)
                if game_data:
                    steam_page_info = get_steam_page_info(game_id)

                    add_date = int(time.time())
                    game_type = game_data.get('type', 'Unknown')
                    dev = ', '.join(game_data.get('developers', []))
                    publisher = ', '.join(game_data.get('publishers', []))
                    tags = ', '.join(steam_page_info['tags']) if steam_page_info else ''
                    release_date = game_data.get('release_date', {}).get('date', '')
                    description = game_data.get('short_description', '')
                    ai_generated = 'Yes' if steam_page_info and steam_page_info['ai_generated'] else 'No'
                    ai_content = steam_page_info['ai_content'] if steam_page_info and steam_page_info['ai_generated'] else None
                    content_descriptors = game_data.get('content_descriptors', {})
                    content_descriptors_ids = content_descriptors.get('ids', [])
                    content_descriptors_str = ', '.join(map(str, content_descriptors_ids))
                    supported_languages = game_data.get('supported_languages', '')
                    free = 'Yes' if game_data.get('is_free', False) else 'No'
                    dlc = 'Yes' if game_data.get('type', '') == 'dlc' else 'No'

                    execute_with_retry(cursor_aug, '''
                    INSERT OR REPLACE INTO aug_steam_games
                    (game_id, add_date, type, dev, publisher, tags, release_date, description, ai_generated, ai_content, 
                    content_descriptors, supported_languages, free, dlc)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                    ''', (game_id, add_date, game_type, dev, publisher, tags, release_date, description, ai_generated, ai_content,
                          content_descriptors_str, supported_languages, free, dlc))

                with open('last_processed_game_id.txt', 'w') as f:
                    f.write(str(game_id))

                time.sleep(random.uniform(0.5, 1.7))

            conn_aug.commit()
            logging.info("Mise à jour terminée avec succès")
            
        except Exception as e:
            logging.error(f"Une erreur est survenue : {e}")
            if conn_aug:
                conn_aug.rollback()
        finally:
            if cursor_steam:
                cursor_steam.close()
            if cursor_aug:
                cursor_aug.close()
            if conn_steam:
                conn_steam.close()
            if conn_aug:
                conn_aug.close()
    
    os.unlink(temp_db.name)
    logging.info("Fichier temporaire supprimé")

if __name__ == "__main__":
    update_aug_steam_games()

2024-08-01 19:10:09,948 - INFO - Dernier game_id traité: 70640
2024-08-01 19:10:10,562 - INFO - Base de données téléchargée avec succès: /var/folders/b5/ckj362xj7ng_xqbjrqp077qh0000gn/T/tmpfzxl9_1h.db
2024-08-01 19:10:10,565 - INFO - Traitement du game_id : 70650
2024-08-01 19:10:10,807 - INFO - Détails récupérés pour le jeu 70650
2024-08-01 19:10:11,242 - INFO - Informations de la page Steam récupérées pour le jeu 70650
2024-08-01 19:10:12,838 - INFO - Traitement du game_id : 70660
2024-08-01 19:10:13,090 - INFO - Détails récupérés pour le jeu 70660
2024-08-01 19:10:13,780 - INFO - Informations de la page Steam récupérées pour le jeu 70660
2024-08-01 19:10:14,609 - INFO - Traitement du game_id : 70900
2024-08-01 19:10:14,840 - INFO - Détails récupérés pour le jeu 70900
2024-08-01 19:10:15,676 - INFO - Informations de la page Steam récupérées pour le jeu 70900
2024-08-01 19:10:17,096 - INFO - Traitement du game_id : 70910
2024-08-01 19:10:17,382 - INFO - Détails récupérés pour le jeu 7