In [1]:
import time as mytime
import pandas as pd
import requests
import concurrent.futures
import csv
from datetime import datetime
import io


# Function to get game data from API
def get_game_data(game_id):
    try:

        url = 'https://store.steampowered.com/appreviews/'+str(game_id)+'?json=1&language=all&review_type=all&purchase_type=all&day_range=9223372036854775807&filter=all'
        response = requests.get(url, timeout=10)
        response.encoding = 'utf-8-sig'
        data = response.json().get('query_summary')
        increment_counter()

        try:
            url = 'https://store.steampowered.com/api/appdetails?appids='+str(game_id)
            response = requests.get(url, timeout=10)  
            increment_counter()
            data.update(response.json().get(str(game_id)).get('data'))
            data['last_updated'] = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            print("Success")
            return data
        
        except Exception as e:
            print("An error occurred while fetching data using API for game ID:", game_id)
            print(e)
            return None

    except ValueError:
        print("Proxy not found, removing from the list.")
        return get_game_data(game_id)
    
    except requests.exceptions.ProxyError as e:
        print("Proxy error occurred, retrying in 2 seconds:")
        mytime.sleep(2)
        return get_game_data(game_id)
    
    except requests.exceptions.ConnectTimeout as e:
        print("Connection timeout occurred, retrying in 2 seconds:")
        mytime.sleep(2)
        return get_game_data(game_id)
    
    except requests.exceptions.SSLError as e:
        print("SSL error occurred, removing proxy from the list:")
        return get_game_data(game_id)
    
    except requests.exceptions.ReadTimeout as e:
        print("Read timeout occurred, retrying with longer timeout:")
        mytime.sleep(2)
        return get_game_data(game_id)
    
    
# Counter and a function to increment it
counter = 0
last_request_time = mytime.monotonic()

#80 requests per minute seems to be the limit, but there's no documentation to support my statement
def increment_counter():
    global counter
    global last_request_time
    counter += 1
    if counter % 80 == 0:
        elapsed_time = mytime.monotonic() - last_request_time
        if elapsed_time < 60:
            sleep_time = 60 - elapsed_time
            print(f"Reached the limit of 80 requests per minute, sleeping for {sleep_time:.2f} seconds...")
            mytime.sleep(sleep_time)
            last_request_time = mytime.monotonic()

    
# Load input data
df_input = pd.read_csv('Steam Games Database.csv')

# Define a list of lists to store results for each thread
results = [[] for _ in range(len(df_input))]

# Define the number of threads to use
num_threads = 1

output_file = 'Steam Games Data.csv'

# Get the header from the file
with open(output_file, 'r', newline='', encoding='utf-8-sig') as csvfile:
    reader = csv.reader(csvfile)
    headers = next(reader)  # read the header from the file

# Create a thread pool and map the API requests to threads
with concurrent.futures.ThreadPoolExecutor(max_workers=num_threads) as executor:
    # Map each game ID to a separate thread and its corresponding proxy list
    for i, game_id in enumerate(df_input["appid"]):
        thread_index = i % num_threads

        # Submit the API request for the current game ID to the current thread
        future = executor.submit(get_game_data, game_id)
        # Append the future object to the corresponding results list for the current thread
        results[thread_index].append(future)

        # Export to CSV after each iteration
        with io.open(output_file, mode='a', newline='', encoding='utf-8-sig') as file:
            writer = csv.writer(file)
            
            # Iterate over the thread results for each thread
            for thread_results in results:
                # Iterate over the futures for each game ID submitted to the thread
                for future in thread_results:
                    data = future.result()

                    if data:
                        row_values = []
                        for header in headers:
                            if header in data:
                                row_values.append(data[header])
                            else:
                                row_values.append('')
                        writer.writerow(row_values)
                        file.flush()
                        data.clear()  # clear the data after appending it to the file

Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Reached the limit of 40 requests per minute, sleeping for 44.78 seconds...
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
An error occurred while fetching data using API for game ID: 70600
'NoneType' object is not iterable
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Reached the limit of 40 requests per minute, sleeping for 43.32 seconds...
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Succe

Bad pipe message: %s [b'\x9c\xe9\x9c\xaac\x08\x9fY\x91l7\xea\xed\x1b\xe5e\xd8\xc5 O\xdd,Qsx!.T\x0b\x94Q\xfbm\xc4\rM\xb7\tW7\x1f:/\n\x18\x05\xc1\x02\x93\x8b\xb0\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00\x1c\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\t\x08']
Bad pipe message: %s [b'\x0b\x08\x04\x08\x05\x08\x06\x04\x01']
Bad pipe message: %s [b'9\xa1\xdaP\xe8\xd5Qe(\xb8U\xf2\xd3\xec\xf2\x010\' \xb3fX\xe01\x03\x98l\xf0xp\x1c\t\xb4T\xe4~\xcc"\xdb\x18)\xb6\x86\xe7\xe2\xef\xf2^\xd1\xf5=\x00\x08\x13\x02\x13\x03\x13\x01\x00\xff\x01\x00\x00\x8f\x00\x00\x00\x0e\x00\x0c\x00\x00\t127.0.0.1\x00\x0b\x00\x04\x03\x00\x01\x02\x00\n\x00\x0c\x00\n\x00\x1d\x00\x17\x00\x1e\x00\x19\x00\x18\x00#\x00\x00\x00\x16\x00\x00\x00\x17\x00\x00\x00\r\x00\x1e\x00\x1c\x04\x03\x05\x03\x06\x03\x08\x07\x08\x08\x08\

Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Reached the limit of 40 requests per minute, sleeping for 45.38 seconds...
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Reached the limit of 40 requests per minute, sleeping for 46.24 seconds...
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Success
Su