In [1]:
from stackapi import StackAPI, StackAPIError

import time
import os

import pandas as pd

In [2]:
def err_handle(e):

    print("Error URL: {}".format(e.url))
    print("Error Code: {}".format(e.code))
    print("Error Error: {}".format(e.error))
    print("Error Message: {}".format(e.message))

In [3]:
def get_questions_daily(site, ids, offset, answer_path, n_fetch, n_questions = 50):
    
    """
    Fetches the answers and adds them to the main answers file.
    
    site : StackAPI object
        Used to pull the data from Stack Overflow.
    
    ids : list-like
        Question IDs to be used as filters to fetch answers.
    
    n_questions : int
        Number of questions used for answer extraction in a single fetch command.
    
    n_fetch : int
        Number of times requests are sent.
    
    offset : int
        Indicates the point from which the data starts being pulled.
    """

    print("Start code")
    
    results = []
    
    try:
        
        for i in range(n_fetch):
            
            id_list = ids[i * n_questions: (i+1) * n_questions]            

            fetch_obj = site.fetch("questions/{ids}/answers", ids = id_list)

            print("API Fetched")

            items = fetch_obj["items"]

            backoff = fetch_obj["backoff"]

            if backoff > 0:
                time.sleep(backoff)

            for item in items:

                for owner_key in ["user_id", "reputation"]:

                    if owner_key not in item["owner"].keys():

                        item["owner"][owner_key] = "NA"

                keywords = [(item, key) for key in ["creation_date", "answer_id", 
                                                    "is_accepted", "question_id", "score"]] + \
                            [(item["owner"], key) for key in ["user_id", "reputation", "user_type"]]

                results.append({key: str(obj[key]) for obj, key in keywords})

        print("Answer meta data extracted")

    except StackAPIError as e:

        err_handle(e)
        
    finally:
        
        print("Number of Records", len(results))
        
        pd.DataFrame(results).to_csv(answer_path, mode = "a+", header = not(os.path.exists(answer_path)), index = False)
        
        return fetch_obj

In [6]:
# set question offset for next addition
answer_path = os.path.join(os.getcwd(), "answers.csv")

offset = 5000

#if os.path.exists(answer_path):
#    offset = pd.read_csv(answer_path)["question_id"].nunique()

# assuming each question has on average 2 answers
n_questions = 50  # upto 100

n_fetch = 50

# get question list
questions = pd.read_csv("questions-pending.csv")

questions["CreationDate"] = pd.to_datetime(questions["CreationDate"])

#questions = questions[(questions["CreationDate"] >= "2019-11-01") 
#                      & (questions["CreationDate"] <= "2020-02-28")].sort_values("Id")
# 2331 questions #2580 answers
#questions = questions[(questions["CreationDate"] >= "2018-11-01") 
#                      & (questions["CreationDate"] <= "2019-02-28")].sort_values("Id")
# 3210 questions #2080 answers

#questions = questions[(questions["CreationDate"] >= "2017-11-01") # 2363
#                      & (questions["CreationDate"] <= "2018-02-28")].sort_values("Id") 
# 1239 questions #2363 answers
#questions = questions[(questions["CreationDate"] >= "2016-11-01") # 2363
#                      & (questions["CreationDate"] <= "2017-02-28")].sort_values("Id") 
# 1752 questions #3426 answers
#questions = questions[(questions["CreationDate"] >= "2016-01-01") # 2363
#                      & (questions["CreationDate"] <= "2016-02-28")].sort_values("Id")

#id_list = list(questions["Id"][offset:offset + n_questions * n_fetch])
id_list = list(questions["Id"][5000:7291])
id_list_1 = []
for eachID in id_list:
    id_list_1.append(int(eachID))
print (str(len(id_list_1)))
# extract answers
try:
    site = StackAPI("stackoverflow", page_size = 100, max_pages = 1)
    fetch_object = get_questions_daily(site, ids = id_list_1, n_questions = n_questions, 
                                       n_fetch = n_fetch, offset = offset, answer_path = answer_path)

except StackAPIError as e:
    err_handle(e)

2291
Start code
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
API Fetched
Error URL: https://api.stackexchange.com/2.2/questions//answers/?pagesize=100&page=1&filter=default&site=stackoverflow
Error Code: no_method
Error Error: 404
Error Message: no method found with this name
Number of Records 4514
