In [1]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
import json

In [2]:
#Function that returns the different scenarios with its corresponding information related to cpu and speed
def get_scenarios(divisions_cpu, divisions_speed):
    try:
        # Get the chart: Comparing most common desktop, laptops and mobile CPUs 
        response = requests.get("https://www.cpubenchmark.net/cross-platform.html")

        # Parse the HTML content and find the chart with the data
        soup = BeautifulSoup(response.content, 'html.parser')
        chart_list = soup.find('ul', class_='chartlist')

        # Extract each row and its data from the chart list
        items = chart_list.find_all('li')
        names = [item.find('span', class_='prdname').text.strip().lower() for item in items]
        scores = [float(item.find('span', class_='count').text.strip().replace(',', '')) for item in items]
        types = [item.find('button', class_='buttonToggle').text.strip().lower() for item in items]

        # Create DataFrame
        cpu_df = pd.DataFrame({
            "Name": names,
            "Type": types,
            "Score": scores,
        })

        # Normalize scores
        cpu_df["Score"] = ((cpu_df["Score"] - cpu_df["Score"].min()) / (cpu_df["Score"].max() - cpu_df["Score"].min()))

        #Calculate cpu quantiles
        quantiles = [i / divisions_cpu for i in range(1, divisions_cpu + 1)]
        quantile_values = cpu_df["Score"].quantile(quantiles)

        # Set scenarios and its ranges
        scenarios_cpu = []
        for i, quantile in enumerate(quantile_values):
            device_scenario = i+1
            if i != 0:
                range_min = quantile_values[quantiles[i-1]]
            else:
                range_min = 0
            range_max = quantile_values[quantiles[i]]   
            scenario_data = {
                "Device_Scenario": device_scenario,
                "Range_Min": range_min,
                "Range_Max": range_max,
            }
            scenarios_cpu.append(scenario_data)          

        #Get the chart: Median country speed into a dataframe
        response = requests.get("https://www.speedtest.net/global-index")

        # Find the variable 'results' and convert it to json
        match = re.search(r'var\s+results\s*=\s*({.*?});', response.text, re.DOTALL).group(1)
        data_dict = json.loads(match)      
        speed_data = {}

        # Extract data from  json 
        for entry in data_dict["fixedMedian"]:
            country = entry["country"]["country_name"]
            speed_data[country] = {
                "Rank": entry["rank"],
                "Fixed downlink": entry["download_mbps"],
                "Fixed uplink": entry["upload_mbps"],
                "Fixed latency": entry["latency_ms"],
                "Fixed jitter": entry["jitter"],
                "Mobile downlink": None,
                "Mobile uplink": None,
                "Mobile latency": None,
                "Mobile jitter": None
            }   
        for entry in data_dict["mobileMedian"]:
            country = entry["country"]["country_name"]
            speed_data[country]["Mobile downlink"] = entry["download_mbps"]
            speed_data[country]["Mobile uplink"] = entry["upload_mbps"]
            speed_data[country]["Mobile latency"] = entry["latency_ms"]
            speed_data[country]["Mobile jitter"] = entry["jitter"]

        # Create DataFrame and convert speed values to float
        speed_df = pd.DataFrame.from_dict(speed_data, orient='index').reset_index()
        speed_df.columns = ['Country', 'Rank', 'Fixed downlink', 'Fixed uplink', 'Fixed latency', 'Fixed jitter', 'Mobile downlink', 'Mobile uplink', 'Mobile latency', 'Mobile jitter']
        speed_columns = ['Fixed downlink', 'Fixed uplink', 'Fixed latency', 'Fixed jitter', 'Mobile downlink', 'Mobile uplink', 'Mobile latency', 'Mobile jitter']
        speed_df[speed_columns] = speed_df[speed_columns].astype(float)

        scenarios_speed = []
        connection_types = ["Mobile", "Fixed"]

        for connection_type in connection_types: 
            # Calculate quantile values for mobile and fixed connections
            quantiles = [i / int(divisions_speed/2) for i in range(1, int((divisions_speed/2)+1))]
            quantile_values = speed_df[f"{connection_type} downlink"].quantile(quantiles)

            # Set scenarios and its ranges
            for i, quantile in enumerate(quantile_values):
                speed_scenario = i+1
                if i != 0:
                    range_min = quantile_values[quantiles[i-1]]
                else:
                    range_min = 0
                range_max = quantile_values[quantiles[i]]

                # Filter country rows within the quantile range and extract mean values
                quantile_countries = speed_df[(speed_df[f"{connection_type} downlink"] >= range_min) & (speed_df[f"{connection_type} downlink"] <= range_max)]
                down_speed = quantile_countries[f"{connection_type} downlink"].median()
                up_speed = quantile_countries[f"{connection_type} uplink"].median()
                latency = quantile_countries[f"{connection_type} latency"].median()
                jitter = quantile_countries[f"{connection_type} jitter"].median()            

                scenario_data = {
                    "Connection_Type": connection_type,
                    "Country_Scenario": speed_scenario,
                    "Range_Min": range_min,
                    "Range_Max": range_max,
                    "Down_Speed": down_speed,
                    "Up_Speed": up_speed,
                    "Latency": latency,
                    "Jitter": jitter
                }
                scenarios_speed.append(scenario_data)

        # Get a dataframe with the information of the scenarios combining cpu and speed 
        scenario_list = []
        for cpu_scenario in scenarios_cpu:
            for speed_scenario in scenarios_speed:
                scenario_info = {
                    "Scenario_Number": len(scenario_list) + 1,
                    "CPU_Scenario": cpu_scenario["Device_Scenario"],
                    "Speed_Scenario": speed_scenario["Country_Scenario"],
                    "Connection_Type": speed_scenario["Connection_Type"],
                    "Range_CPU": (cpu_scenario["Range_Min"], cpu_scenario["Range_Max"]),
                    "Range_Speed": (speed_scenario["Range_Min"], speed_scenario["Range_Max"]),
                    "Down_Speed": speed_scenario["Down_Speed"],
                    "Up_Speed": speed_scenario["Up_Speed"],
                    "Latency": speed_scenario["Latency"],
                    "Jitter": speed_scenario["Jitter"]
                }
                scenario_list.append(scenario_info)
        scenarios_df = pd.DataFrame(scenario_list)
        return scenarios_df, cpu_df, speed_df
    
    except:
        print("It was not possible to define the different scenarios ")


#Type: The type of device, such "Desktop", “Connected TV”, "Smart Phone", "Tablet"   
    #Supposing that Laptop is inside Desktop
#Brand: The brand of mobile device, such "Acer", "Amazon", "Apple", "Samsung", "Xiaomi"...
#Model: The brand and model of device, such "Amazon Kindle Fire", "Apple iPhone", "Samsung Galaxy"...    

#Function that returns the scenario of a given device in agiven country
def retrieve_scenario(cpu_df, speed_df, country_name, device_type, device_brand = "", device_model = ""):
    try:
        # Get the score of a device depending on the known information
        device_found = False

        if device_model:
            #Look for the devices that contains the given model in the dataframe and compute the average score of them
            model_devices = cpu_df[cpu_df["Name"].str.contains(device_model.lower(), case=False)]
            if not model_devices.empty:
                score = model_devices["Score"].median()
                device_found = True

        if device_brand and not device_found:
            #Look for the devices of the given brand in the dataframe and compute the average score of them
            brand_devices = cpu_df[cpu_df["Name"].str.contains(device_brand.lower(), case=False)]
            if not brand_devices.empty:
                score = brand_devices["Score"].median()
                device_found = True

        if not device_found:
            if device_type.lower() == "desktop":
                # Compute the average score of the devices with system 'desktop' or 'laptop'
                devices = cpu_df[(cpu_df["Type"] == "desktop") | (cpu_df["Type"] == "laptop")]

            elif device_type.lower() == "smart phone" or device_type.lower() == "tablet":
                # Compute the average score of the devices with system 'android' or 'ios'
                devices = cpu_df[(cpu_df["Type"] == "ios") | (cpu_df["Type"] == "android")]

            elif device_type.lower() == "connected tv" :
                # Compute the average score of the devices with system 'android'
                devices = cpu_df[cpu_df["Type"] == "android"]

            if not devices.empty:
                score = devices["Score"].median()
            else:
                score = cpu_df["Score"].median()
                
        # Get the type of connection according to the device
        if device_type.lower() == "desktop" or device_type.lower() == "connected tv":
            connection_type = "Fixed"
        else:
            connection_type = "Mobile" 

        # Replace Nan values by 0 in speed_df
        speed_df.fillna(0, inplace=True)

        # Get downlink speed of a country according to the connection type
        country_speed = speed_df[speed_df["Country"] == country_name]
        if not country_speed.empty:
            downlink_speed = country_speed[f"{connection_type} downlink"].values[0]

            # Compute the average speed for both above and below items according to fixed downlink speed ranking when country speed is not available 
            if downlink_speed == 0.0:
                country_rank = country_speed["Rank"].values[0]
                previous_speed = 0.0
                previous_rank = country_rank - 1
                next_speed = 0.0
                next_rank = country_rank + 1
                while previous_speed == 0.0:
                    previous_speed = speed_df[speed_df['Rank'] == previous_rank][f"{connection_type} downlink"].values[0]
                    previous_rank -= 1
                    if previous_rank < 1:
                        previous_speed = speed_df[f"{connection_type} downlink"].median()
                while next_speed == 0.0:
                    next_speed = speed_df[speed_df['Rank'] == next_rank][f"{connection_type} downlink"].values[0]
                    next_rank += 1
                    max_rank = speed_df.shape[0]
                    if next_rank > max_rank :
                        next_speed = speed_df[f"{connection_type} downlink"].median()
                downlink_speed = (previous_speed + next_speed) / 2
        else:
            #If there is no information about given country, take the median value of all of them
            downlink_speed = speed_df[f"{connection_type} downlink"].median()

        # Find the corresponding speed scenario for the speed
        for _, row in scenarios_df.iterrows():
            if row["Connection_Type"] == connection_type:
                if score >= row["Range_CPU"][0] and score < row["Range_CPU"][1]:
                    if downlink_speed >= row["Range_Speed"][0] and downlink_speed <= row["Range_Speed"][1]:
                        scenario_number = row["Scenario_Number"]
        print("Downlink speed of the country: ", downlink_speed, "\nScore of the device", score, "\nConnection type:", connection_type)
        return scenario_number
    except:
        print("It was no possible to get the scenario. Review the arguments")


In [3]:
scenarios_df, cpu_df, speed_df = get_scenarios(4,9)

In [4]:
retrieve_scenario(cpu_df, speed_df, "France", "Tablet" )

Downlink speed of the country:  98.03 
Score of the device 0.03666160788632518 
Connection type: Mobile


12

In [5]:
retrieve_scenario(cpu_df, speed_df, "Spain", "Desktop", device_brand = "Apple")

Downlink speed of the country:  217.3 
Score of the device 0.1191678530178087 
Connection type: Fixed


32

In [6]:
retrieve_scenario(cpu_df, speed_df, "Liberia", "Smart Phone", device_brand = "BQ", device_model = "BQ AQUARIS X PRO")

Downlink speed of the country:  25.03 
Score of the device 0.027303983453759166 
Connection type: Mobile


10

In [7]:
retrieve_scenario(cpu_df, speed_df, "Singapore", "Connected TV")

Downlink speed of the country:  284.13 
Score of the device 0.03602030675011416 
Connection type: Fixed


16

In [8]:
df = pd.read_csv("dv360_devices.csv")
df["Device Type"].unique()

array(['Smart Phone', 'Tablet', 'Desktop', 'Connected TV'], dtype=object)