In [1]:
import requests
import re
from bs4 import BeautifulSoup
import pandas as pd
import json

In [6]:
#todo funcion con rango
#si es cero los de ariba de fijo media



#This function converts the chart: Comparing most common desktop, laptops and mobile CPUs into a dataframe  

def get_cpu_dataframe():
    try:  
        response = requests.get("https://www.cpubenchmark.net/cross-platform.html")
        # Parse the HTML content and find the chart with the data
        soup = BeautifulSoup(response.content, 'html.parser')
        chart_list = soup.find('ul', class_='chartlist')

        # Extract each row and its data from the chart list
        items = chart_list.find_all('li')
        names = [item.find('span', class_='prdname').text.strip().lower() for item in items]
        scores = [float(item.find('span', class_='count').text.strip().replace(',', '')) for item in items]
        types = [item.find('button', class_='buttonToggle').text.strip().lower() for item in items]

        # Create DataFrame
        df = pd.DataFrame({
            'Name': names,
            'Type': types,
            'Score': scores,
        })
        #Normalize scores
        df['Score'] = ((df['Score'] - df['Score'].min()) / (df['Score'].max() - df['Score'].min()))
        return df
        
    except:
        print("It was not possible to get devices benchmarking chart")
    

#This function gets the score of a device depending on the known information

def get_device_score(df, device_type="", device_system="", device_brand="", device_model=""):  
    #Type: The type of device, such "Desktop", “Connected TV”, "Smartphone", "Tablet"   
        #Supposing that Laptop is inside Desktop
    #System: The name of an operating system
        # The database only distinguish between ios and android
    #Brand: The brand of mobile device, such "Acer", "Amazon", "Apple", "Samsung", "Xiaomi"...
    #Model: The brand and model of device, such "Amazon Kindle Fire", "Apple iPhone", "Samsung Galaxy"...
    
    if(device_model):
        #Look for the devices that contains the given model in the dataframe and compute the average score of them
        model_devices = df[df['Name'].str.contains(device_model.lower(), case=False)]
        if not model_devices.empty:
            return model_devices['Score'].mean()
        #device_brand = device_model.split()[0]
        
    if(device_brand):
        #Look for the devices of the given brand in the dataframe and compute the average score of them
        brand_devices = df[df['Name'].str.contains(brand_name.lower(), case=False)]
        if not brand_devices.empty:
            return brand_devices['Score'].mean()
        
    if(device_system):
        #Look for the devices with the given operating system in the dataframe and compute the average score of them
        type_devices = df[df['Type'].str.contains(device_system.lower(), case=False)]
        if not type_devices.empty:
            return type_devices['Score'].mean()
        
    if(device_type):
        if device_type.lower() == "desktop":
            # Compute the average score of the devices with system 'desktop' or 'laptop'
            computer_devices = df[(df['Type'] == 'desktop') | (df['Type'] == 'laptop')]
            if not computer_devices.empty:
                return computer_devices['Score'].mean()
        elif device_type.lower() == "smartphone" or device_type.lower() == "tablet":
             # Compute the average score of the devices with system 'android' or 'ios'
            mobile_devices = df[(df['Type'] == 'ios') | (df['Type'] == 'android')]
            if not mobile_devices.empty:
                return mobile_devices['Score'].mean()
        elif device_type.lower() == "connected tv" :
            # Compute the average score of the devices with system 'android'
            tv_devices = df[df['Type'] == 'android']
            if not tv_devices.empty:
                return tv_devices['Score'].mean()

    # Device is tv or unspecified - Compute the average score of all the devices
    return df['Score'].mean()

    
#This function converts the chart: Median country speed into a dataframe  

def get_speed_dataframe():
    try:  
        response = requests.get("https://www.speedtest.net/global-index")
        # Find the variable 'results' and convert it to json
        match = re.search(r'var\s+results\s*=\s*({.*?});', response.text, re.DOTALL).group(1)
        data_dict = json.loads(match)      
        speed_data = {}
    
        # Extract data from  json 
        for entry in data_dict['fixedMedian']:
            country = entry['country']['country_name']
            speed_data[country] = {
                'Fixed downlink': entry['download_mbps'],
                'Fixed uplink': entry['upload_mbps'],
                'Fixed latency': entry['latency_ms'],
                'Fixed jitter': entry['jitter'],
                'Mobile downlink': None,
                'Mobile uplink': None,
                'Mobile latency': None,
                'Mobile jitter': None
            }   
        for entry in data_dict['mobileMedian']:
            country = entry['country']['country_name']
            speed_data[country]['Mobile downlink'] = entry['download_mbps']
            speed_data[country]['Mobile uplink'] = entry['upload_mbps']
            speed_data[country]['Mobile latency'] = entry['latency_ms']
            speed_data[country]['Mobile jitter'] = entry['jitter']
                
        # Create DataFrame replace Nan values by 0 and convert speed values to float
        df = pd.DataFrame.from_dict(speed_data, orient='index').reset_index()
        df.columns = ['Country', 'Fixed downlink', 'Fixed uplink', 'Fixed latency', 'Fixed jitter', 'Mobile downlink', 'Mobile uplink', 'Mobile latency', 'Mobile jitter']
        df.fillna(0, inplace=True)
        speed_columns = ['Fixed downlink', 'Fixed uplink', 'Fixed latency', 'Fixed jitter', 'Mobile downlink', 'Mobile uplink', 'Mobile latency', 'Mobile jitter']
        df[speed_columns] = df[speed_columns].astype(float)
        return df
    
    except:
        print("It was not possible to get country speed information")
        
        
#This function gets the speed information of a country

def get_country_speed(df, country):
    country_info = df[df['Country'] == country]
    if not country_info.empty:
        return country_info
    else:
        return "Country not found in the data."
    
    
# This function calculates quantiles dynamically based on the number of divisions and determines in which one the device falls
                       
def get_range(column, divisions, score):
    quantiles = [i / divisions for i in range(1, divisions+1)]
    quantile_values = column.quantile(quantiles)
    for i, quantile in enumerate(quantile_values):
        if score <= quantile:
            device_range = i+1
            if i != 0:
                range_min = quantile_values[quantiles[i-1]]
            else:
                range_min = 0
            range_max = quantile_values[quantiles[i]]
            range_ = [range_min, range_max]   
            return device_range, range_

# Get CPU level according to the device

In [8]:
cpu_df = get_cpu_dataframe()
score = get_device_score(cpu_df, device_system="android")
device_range, column_range = get_range(cpu_df['Score'], 10, score)
print("The normalized score of the device is ", score, "and it correspons to the Level", device_range, "that contains the range", column_range )

The normalized score of the device is  0.0441699441689714 and it correspons to the Level 6 that contains the range [0.039683019374769146, 0.047372485813102314]


# Get speed level according to the country

In [4]:
device_type = "la"
if device_type.lower() == "desktop" or device_type.lower() == 'connected  tv':
    connection_type = 'Fixed'
else:
    connection_type = 'Mobile' 
    
speed_df = get_speed_dataframe()
speed = get_country_speed(speed_df,"Spain")
download_speed = speed[f"{connection_type} downlink"].values[0]
device_range, column_range = get_range(speed_df[f"{connection_type} downlink"], 3, download_speed)
print("The median speed of the country is ", download_speed, "and it correspons to the Level", device_range)
print(column_range)
# Filter row countries within the quantile range and extract mean values
quantile_countries = speed_df[(speed_df[f"{connection_type} downlink"] >= column_range[0]) & (speed_df[f"{connection_type} downlink"] <=column_range[1])]
down_speed = quantile_countries[f"{connection_type} downlink"].mean()
up_speed = quantile_countries[f"{connection_type} uplink"].mean()
latency = quantile_countries[f"{connection_type} latency"].mean()
jitter = quantile_countries[f"{connection_type} jitter"].mean()
print("Downlink speed: ",down_speed, ", Uplink speed: ", up_speed, ", Latency: ", latency, ", Jitter: ", jitter)

The median speed of the country is  47.49 and it correspons to the Level 3
[46.899999999999956, 313.3]
Downlink speed:  96.3222950819672 , Uplink speed:  16.297704918032785 , Latency:  22.37704918032787 , Jitter:  6.80327868852459
