In [4]:
import time as mytime
import pandas as pd
import requests
import concurrent.futures
import csv
from datetime import datetime


# Function to get game data from API
def get_game_data(game_id):
    try:

        url = 'https://store.steampowered.com/appreviews/'+str(game_id)+'?json=1&language=all&review_type=all&purchase_type=all&day_range=9223372036854775807&filter=all'
        response = requests.get(url, timeout=10)
        data = response.json().get('query_summary')
        increment_counter()

        try:
            url = 'https://store.steampowered.com/api/appdetails?appids='+str(game_id)
            response = requests.get(url, timeout=10)          
            data.update(response.json().get(str(game_id)).get('data'))
            data['last_updated'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print("Success")
            return data
        
        except Exception as e:
            print("An error occurred while fetching data using API for game ID:", game_id)
            print(e)
            return None

    except ValueError:
        print("Proxy not found, removing from the list.")
        return get_game_data(game_id)
    
    except requests.exceptions.ProxyError as e:
        print("Proxy error occurred, retrying in 2 seconds:")
        mytime.sleep(2)
        return get_game_data(game_id)
    
    except requests.exceptions.ConnectTimeout as e:
        print("Connection timeout occurred, retrying in 2 seconds:")
        mytime.sleep(2)
        return get_game_data(game_id)
    
    except requests.exceptions.SSLError as e:
        print("SSL error occurred, removing proxy from the list:")
        return get_game_data(game_id)
    
    except requests.exceptions.ReadTimeout as e:
        print("Read timeout occurred, retrying with longer timeout:")
        mytime.sleep(2)
        return get_game_data(game_id)
    
    
# Counter and a function to increment it
counter = 0
last_request_time = mytime.monotonic()

#40 requests per minute seems to be the limit, but there's no documentation to support it
def increment_counter():
    global counter
    global last_request_time
    counter += 1
    if counter % 40 == 0:
        elapsed_time = mytime.monotonic() - last_request_time
        if elapsed_time < 60:
            sleep_time = 60 - elapsed_time
            print(f"Reached the limit of 40 requests per minute, sleeping for {sleep_time:.2f} seconds...")
            mytime.sleep(sleep_time)
            last_request_time = mytime.monotonic()


    
# Load input data
df_input = pd.read_csv('Steam Games Database.csv')

# Define a list of lists to store results for each thread
results = [[] for _ in range(len(df_input))]

# Define the number of threads to use
num_threads = 1

output_file = 'Steam Games Data.csv'

# Get the header from the file
with open(output_file, 'r', newline='') as csvfile:
    reader = csv.reader(csvfile)
    headers = next(reader)  # read the header from the file

# Create a thread pool and map the API requests to threads
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
    # Map each game ID to a separate thread and its corresponding proxy list
    for i, game_id in enumerate(df_input["appid"]):
        thread_index = i % num_threads

        # Submit the API request for the current game ID to the current thread
        future = executor.submit(get_game_data, game_id)
        # Append the future object to the corresponding results list for the current thread
        results[thread_index].append(future)

        # Export to CSV after each iteration
        with open(output_file, mode='a', newline='', encoding='utf-8') as file:

            writer = csv.writer(file)
            
            
            for thread_results in results:
                for future in thread_results:
                    data = future.result()
                    
                    if data:
                        row_values = []
                        for header in headers:
                            if header in data:
                                row_values.append(data[header])
                            else:
                                row_values.append('')
                        writer.writerow(row_values)
                        file.flush()
                        data.clear()  # clear the data after appending it to the file

Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success


KeyboardInterrupt: 