# Requests to API

This python code serves as a base to perform requests to API.

In [None]:
# needed imports
import requests
import json
from requests.auth import HTTPBasicAuth
from datetime import datetime
import pandas as pd
import csv

# Cluster Analysis data

In [None]:
# String contains url/endpoint to perform a search operation on the index pattern
url = 
# Header with Content-type required to make a http request
headers = {"Content-Type": "application/json"}
# Desire Query
query = {
    "size": 10000,
    "_source": ["commands","startTime", "endTime", "hostIP","loggedin", "peerIP",
                "protocol","hostPort","peerPort","geoip.country_name","geoip.continent_code",
                "hostGeoip.continent_code","hostGeoip.country_name","version"],
    "query": {
        "range": {
            "startTime": {
                "gte": "2023-05-01T00:00:00"
            }
        }
    }
}
# Needed function
def flatten_dict(d, parent_key='', sep='.'):
    """
    Flattens a nested dictionary into a flat dictionary with keys that are a combination of the original keys.

    :param d: The input nested dictionary to be flattened.
    :param parent_key: A string representing the prefix for the keys (used for recursion).
    :param sep: The separator used to join keys when creating new flattened keys.
    :return: A flat dictionary with flattened keys.
    """
    flattened = {}
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            flattened.update(flatten_dict(v, new_key, sep=sep))
        else:
            flattened[new_key] = v
    return flattened

In [None]:
def request_to_clustering(url,headers,query):
    ''' 
    Perform a series of HTTP requests to a given URL with the provided headers and query parameters.
    
    Parameters:
        url (str): The URL to send the HTTP requests to.
        headers (dict): The headers to include in the HTTP request.
        query (dict): The query parameters for the request.
        
    Returns:
        pandas.DataFrame: A concatenated DataFrame containing data from successful requests.
        None: If any request is not successful (Status code other than 200).
    '''

    # List to store dataframes from each request
    dfs = [] 
    # Initial start time
    last_start_time = "2023-05-01T00:00:00"

    # Define the target stop date
    stop_date = datetime(2023, 7, 31, 0, 0, 0)
    current_date = datetime.strptime(last_start_time, '%Y-%m-%dT%H:%M:%S')
    current_day = None  # Track the current day

    # Start loop request
    while current_date<stop_date:
        # Check if the current day has changed
        if current_date.date() != current_day:
            print(current_date)
            current_day = current_date.date()  # Update current_day
        # Update the start time for the query
        query["query"]["range"]["startTime"]["gte"] = last_start_time
        # Perform the request
        result = requests.get(url, auth=HTTPBasicAuth,
                          headers=headers, data=json.dumps(query))
        
        if result.status_code == 200: # Successful

            # Extract the data from the response
            data = result.json()
            # Filter the data
            hits = [hit for hit in data['hits']['hits'] if hit['_source']['commands']]
            # Find the maximum of startTime
            last_start_time = max(hit['_source']['startTime'] for hit in hits)
            current_date = datetime.strptime(last_start_time, '%Y-%m-%dT%H:%M:%S.%fZ')
            # Modify the data to remove specific columns
            for hit in hits:
                hit.pop('_index', None)
                hit.pop('_type', None)
                hit.pop('_id', None)
                hit.pop('_score', None)
            # Flatten dicts to convert data to desired format
            flattened_data_list = [flatten_dict(record) for record in hits]
            # Convert to a dataframe
            df = pd.DataFrame(flattened_data_list)
            # Append dataframe
            dfs.append(df)

        else: # Not succesful
            print("Request was not successful. Status code:", result.status_code)
            return None
        
    # Concatenate all dataframes into one
    final_df = pd.concat(dfs, ignore_index=True)
    return final_df

In [None]:
# Request the data
final_df =request_to_clustering(url,headers,query)    

# Store data
final_df.to_csv('../Data/Cluster_data.csv',index=False)

# Forecasting Data

In [None]:
# String contains url/endpoint to perform a search operation on the index pattern
url = 
# Header with Content-type required to make a http request
headers = {"Content-Type": "application/json"}
# Desire Query
query = {
    "size": 10000,
    "_source": ["commands","startTime", "endTime", "hostIP","loggedin", "peerIP",
                "protocol","hostPort","peerPort","geoip.country_name","geoip.continent_code",
                "hostGeoip.continent_code","hostGeoip.country_name","version"],
    "query": {
        "range": {
            "startTime": {
                "gte": "2023-05-01T00:00:00"
            }
        }
    }
}
# Needed function
def flatten_dict(d, parent_key='', sep='.'):
    """
    Flattens a nested dictionary into a flat dictionary with keys that are a combination of the original keys.

    :param d: The input nested dictionary to be flattened.
    :param parent_key: A string representing the prefix for the keys (used for recursion).
    :param sep: The separator used to join keys when creating new flattened keys.
    :return: A flat dictionary with flattened keys.
    """
    flattened = {}
    for k, v in d.items():
        new_key = f"{parent_key}{sep}{k}" if parent_key else k
        if isinstance(v, dict):
            flattened.update(flatten_dict(v, new_key, sep=sep))
        else:
            flattened[new_key] = v
    return flattened

In [None]:
def request_to_forecasting(url,headers,query,initial_time,final_year,final_month,final_day):
    ''' 
    Perform a series of HTTP requests to a given URL with the provided headers and query parameters.
    
    Parameters:
        url (str): The URL to send the HTTP requests to.
        headers (dict): The headers to include in the HTTP request.
        query (dict): The query parameters for the request.
        
    Returns:
        pandas.DataFrame: A concatenated DataFrame containing data from successful requests.
        None: If any request is not successful (Status code other than 200).
    '''

    # List to store dataframes from each request
    dfs = [] 
    # Initial start time
    last_start_time = initial_time

    # Define the target stop date
    stop_date = datetime(final_year, final_month, final_day, 0, 0, 0)
    current_date = datetime.strptime(last_start_time, '%Y-%m-%dT%H:%M:%S')
    current_day = None  # Track the current day

    # Start loop request
    while current_date<stop_date:
        # Check if the current day has changed
        if current_date.date() != current_day:
            print(current_date)
            current_day = current_date.date()  # Update current_day
        # Update the start time for the query
        query["query"]["range"]["startTime"]["gte"] = last_start_time
        # Perform the request
        result = requests.get(url, auth=HTTPBasicAuth,
                          headers=headers, data=json.dumps(query))
        
        if result.status_code == 200: # Successful

            # Extract the data from the response
            data = result.json()
            # Filter the data
            hits = [hit for hit in data['hits']['hits'] if hit['_source']['commands']]
            # Find the maximum of startTime
            last_start_time = max(hit['_source']['startTime'] for hit in hits)
            current_date = datetime.strptime(last_start_time, '%Y-%m-%dT%H:%M:%S.%fZ')
            # Modify the data to remove specific columns
            for hit in hits:
                hit.pop('_index', None)
                hit.pop('_type', None)
                hit.pop('_id', None)
                hit.pop('_score', None)
            # Flatten dicts to convert data to desired format
            flattened_data_list = [flatten_dict(record) for record in hits]
            # Convert to a dataframe
            df = pd.DataFrame(flattened_data_list)
            # Append dataframe
            dfs.append(df)

        else: # Not succesful
            print("Request was not successful. Status code:", result.status_code)
            return None
        
    # Concatenate all dataframes into one
    final_df = pd.concat(dfs, ignore_index=True)
    return final_df

In [None]:
df21_november_to_april = request_to_forecasting(url,headers,query,"2021-11-30T00:00:00",2022,4,30)

In [None]:
# Store csv in Data folder
df21_november_to_april.to_csv('../Data/21_november_to_april.csv', index = False)

In [None]:
df22_april_to_november = request_to_forecasting(url,headers,query,"2022-04-30T00:00:00",2022,11,30)

In [None]:
# Store csv in Data folder
df22_april_to_november.to_csv('../Data/22_april_to_november.csv', index = False)

In [None]:
df22_november_to_april = request_to_forecasting(url,headers,query,"2022-11-30T00:00:00",2023,4,30)

In [None]:
# Store csv in Data folder
df22_november_to_april.to_csv('../Data/22_november_to_april.csv', index = False)

In [None]:
df23_april_to_november = request_to_forecasting(url,headers,query,"2023-04-30T00:00:00",2023,11,29)

In [None]:
# Store csv in Data folder
df23_april_to_november.to_csv('../Data/23_april_to_november.csv', index = False)