### **In this notebook I will try to find the data of ALL discourse users between start and end date**

In [2]:
import requests
import pandas as pd
import os
import time
import json  # Importing json to handle JSON data

def execute_query(query_id, query_params=None):
    DISCOURSE_BASE_URL = "https://discourse.onlinedegree.iitm.ac.in"
    GROUP_NAME = "discourse_analytics"
    API_KEY_GLOBAL= os.environ.get('API_KEY')
    API_USERNAME = 'shubhamG'

    iteration_count = 0  # Initialize iteration counter
    results_list = []  # List to store results
    has_more_results = True  # Flag to control the loop for pagination

    # Check if query_params is provided
    if query_params is None:
        pass  # No parameters provided, continue with default
    else:
        # Ensure query_params is a dictionary
        if not isinstance(query_params, dict):
            raise ValueError("Query parameters must be a dictionary.")

    # Set up headers for the API request
    headers = {
        "Accept": "*/*",
        "Api-Key": API_KEY_GLOBAL,  # Get API key from userdata
        "Api-Username": "shubhamG",  # Set the username for the API
        "Content-Type": "multipart/form-data"  # Set content type
    }

    # Loop until there are no more results
    while has_more_results:
        # Construct the request URL for the API
        request_url = f"{DISCOURSE_BASE_URL}/g/{GROUP_NAME}/reports/{query_id}/run"

        # Prepare the data payload for the request
        if query_params is not None:
            payload = {'page': str(iteration_count)}  # Add page number to payload
            payload.update(query_params)  # Update payload with additional query parameters
            data_payload = 'params=' + json.dumps(payload)  # Convert payload to JSON string
        else:
            data_payload = f'params={{"page": "{iteration_count}"}}'  # Default payload with page number

        try:
            # Send POST request to the API
            print(data_payload)
            response = requests.request("POST", request_url, data=data_payload, headers=headers)
            response.raise_for_status()  # Raise an error for bad responses

            json_response = response.json()  # Parse the JSON response

            # Check if there are no results
            if json_response["result_count"] == 0:
                has_more_results = False  # No more results to fetch
                break

            # Iterate over the rows in the response
            for index in range(len(json_response['rows'])):
                # Append each row as a dictionary to the results list
                results_list.append(dict(zip(json_response['columns'], json_response['rows'][index])))

        except Exception as e:
            has_more_results = False  # Stop fetching results
            print(f"Error: {e}")
            break

        iteration_count += 1  # Increment iteration count for pagination
        if iteration_count>50:
            break
        time.sleep(1)  # Wait before the next request

    results_dataframe = pd.DataFrame(results_list)  # Convert results list to DataFrame
    return results_dataframe  # Return the DataFrame with results

In [11]:
params = {"start_date":"01/01/2025","end_date":'30/04/2025'}
req_data = execute_query(query_id=102,query_params=params)

params={"page": "0", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "1", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "2", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "3", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "4", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "5", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "6", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "7", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "8", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "9", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "10", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "11", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "12", "start_date": "01/01/2025", "end_date": "30/04/2025"}
params={"page": "13", 

In [7]:
req_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42211 entries, 0 to 42210
Data columns (total 10 columns):
 #   Column          Non-Null Count  Dtype
---  ------          --------------  -----
 0   user_id         42211 non-null  int64
 1   likes_received  42211 non-null  int64
 2   likes_given     42211 non-null  int64
 3   topics_viewed   42211 non-null  int64
 4   days_visited    42211 non-null  int64
 5   posts_read      42211 non-null  int64
 6   solutions       42211 non-null  int64
 7   cheers          42211 non-null  int64
 8   topics_created  42211 non-null  int64
 9   posts_created   42211 non-null  int64
dtypes: int64(10)
memory usage: 3.2 MB


In [12]:
all_users_data_folder = "../data/overall_data_all_users"
os.makedirs(all_users_data_folder, exist_ok=True)
req_data.to_excel(os.path.join(all_users_data_folder,"data_all_users_t1_2025.xlsx"),
                  index=False, 
                  sheet_name="user_actions_data")