In [6]:
import requests
import pandas as pd
import logging
import time
import json, yaml

# get API key from yaml file
with open('key.yaml', 'r') as f:
    api_key = yaml.safe_load(f)['API_KEY']

In [8]:
def execute_query(query_id, query_params=None):
    DISCOURSE_BASE_URL = "https://discourse.onlinedegree.iitm.ac.in"
    GROUP_NAME = "discourse_analytics"
    API_KEY_GLOBAL= api_key
    API_USERNAME = 'shubhamG'

    iteration_count = 0  # Initialize iteration counter
    results_list = []  # List to store results
    has_more_results = True  # Flag to control the loop for pagination

    # Check if query_params is provided
    if query_params is None:
        pass  # No parameters provided, continue with default
    else:
        # Ensure query_params is a dictionary
        if not isinstance(query_params, dict):
            raise ValueError("Query parameters must be a dictionary.")

    # Set up headers for the API request
    headers = {
        "Accept": "*/*",
        "Api-Key": API_KEY_GLOBAL,  # Get API key from userdata
        "Api-Username": API_USERNAME,  # Set the username for the API
        "Content-Type": "multipart/form-data"  # Set content type
    }

    # Loop until there are no more results
    while has_more_results:
        # Construct the request URL for the API
        request_url = f"{DISCOURSE_BASE_URL}/g/{GROUP_NAME}/reports/{query_id}/run"

        # Prepare the data payload for the request
        if query_params is not None:
            payload = {'page': str(iteration_count)}  # Add page number to payload
            payload.update(query_params)  # Update payload with additional query parameters
            data_payload = 'params=' + json.dumps(payload)  # Convert payload to JSON string
        else:
            data_payload = f'params={{"page": "{iteration_count}"}}'  # Default payload with page number

        try:
            # Send POST request to the API
            print(data_payload)
            response = requests.request("POST", request_url, data=data_payload, headers=headers)
            response.raise_for_status()  # Raise an error for bad responses

            json_response = response.json()  # Parse the JSON response

            # Check if there are no results
            if json_response["result_count"] == 0:
                has_more_results = False  # No more results to fetch
                break

            # Iterate over the rows in the response
            for index in range(len(json_response['rows'])):
                # Append each row as a dictionary to the results list
                results_list.append(dict(zip(json_response['columns'], json_response['rows'][index])))

        except Exception as e:
            # Log key-related errors
            print(f'EXCEPTION: {e}')
            has_more_results = False  # Stop fetching results
            break

        iteration_count += 1  # Increment iteration count for pagination
        time.sleep(1)  # Wait before the next request

    results_dataframe = pd.DataFrame(results_list)  # Convert results list to DataFrame
    return results_dataframe  # Return the DataFrame with results

In [None]:
req_data = execute_query(query_id=108,query_params=None)

params={"page": "0"}
params={"page": "1"}
params={"page": "2"}
params={"page": "3"}
params={"page": "4"}
params={"page": "5"}
params={"page": "6"}
params={"page": "7"}
params={"page": "8"}
params={"page": "9"}
params={"page": "10"}
params={"page": "11"}
params={"page": "12"}
params={"page": "13"}
params={"page": "14"}
params={"page": "15"}
params={"page": "16"}
params={"page": "17"}
params={"page": "18"}
params={"page": "19"}
params={"page": "20"}
params={"page": "21"}
params={"page": "22"}
params={"page": "23"}
params={"page": "24"}
params={"page": "25"}
params={"page": "26"}
params={"page": "27"}
params={"page": "28"}
params={"page": "29"}
params={"page": "30"}
params={"page": "31"}
params={"page": "32"}
params={"page": "33"}
params={"page": "34"}
params={"page": "35"}
params={"page": "36"}
params={"page": "37"}
params={"page": "38"}
params={"page": "39"}
params={"page": "40"}
params={"page": "41"}
params={"page": "42"}
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41912 entries,

In [11]:
req_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41912 entries, 0 to 41911
Data columns (total 4 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   user_id   41912 non-null  int64 
 1   username  41912 non-null  object
 2   name      41912 non-null  object
 3   email     41912 non-null  object
dtypes: int64(1), object(3)
memory usage: 1.3+ MB


In [14]:
req_data.sample(5, random_state=42)

Unnamed: 0,user_id,username,name,email
15943,19538,be20b030,Sarthak Jain,be20b030@smail.iitm.ac.in
3836,4711,Anay,Anay,21f1005495@ds.study.iitm.ac.in
9162,12613,ayush_sharma001,Ayush Sharma,22f1001876@ds.study.iitm.ac.in
34769,38515,24f3002839,Saumya Radhanpara Jatinbhai,24f3002839@ds.study.iitm.ac.in
655,762,AVB,Boss veeraraghavan annapillai,21f1000044@ds.study.iitm.ac.in


In [15]:
req_data.to_csv('../data/id_username_mapping.csv', index=False)