# Подготовка данных
Загрузим необходимые данные и подготовим их к дальнейшему анализу.

## Setup
Оптимизируем дальнейший код и сразу заполним пространство всем необходимым функционалом.

In [1]:
import os
import logging
import json
from typing import Literal
from datetime import datetime

import requests
import geopandas as gpd
import pandas as pd

In [2]:
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S',
                    force = True)


def check_folder(name: str) -> str:
    """If folder not exists - create, after return path"""

    if not os.path.exists(name):
        os.makedirs(name)

    return name


def get_datatime(y: int, q: int) -> datetime:
    """
    Format year and quarter to datatime

    :param y: year
    :param q: quarter
    :return: datetime
    """

    if not 1 <= q <= 4:
        raise ValueError("Quarter must be within [1, 2, 3, 4]")

    return datetime(y, range(1, 12, 3)[q - 1], 1)

In [3]:
YEARS = range(2019, 2023)  # 4 года - требуется более 20ГБ ОЗУ

SOURCE_PATH = check_folder('data/source')
MERGED_PATH = check_folder('data/merged')
LIGHT_PATH = check_folder('data/light')

OOKLA_NAME = '{dt:%Y-%m}_{st}_tiles.zip'
IMF_NAME = 'imf.csv'
NATURALEARTH_NAME = 'map.zip'

## Source
Загрузим исходные сырые данные из следующих источников:
* **Ookla** - данные о производительности сети. Включают показатели: `avg_d_kbps`, `avg_u_kbps`, `avg_lat_ms`, `tests`, `devices`, `geometry`.
* **IMF** - данные об экономическом состоянии стран. Используемые показатели: `iso_a3`, `income`, `labels`.
* **NaturalEarth** - пространственные данные административного деления мира. Используемые показатели: `iso_a3`, `continent`, `geometry`.

In [4]:
def save_source(url: str, name: str, folder: str):
    logging.info(f'Saving: {name}')

    with requests.get(url) as r:
        r.raise_for_status()

        with open(f'{folder}/{name}', 'wb') as f:
            f.write(r.content)

### Ookla
Загрузим датасеты Ookla за выбранный период.

In [5]:
OOKLA_URL = 'https://ookla-open-data.s3-us-west-2.amazonaws.com/shapefiles/performance/type%3D{st}/year%3D{dt:%Y}/quarter%3D{q}/{dt:%Y-%m-%d}_performance_{st}_tiles.zip'


def save_source_ookla(st: Literal['fixed', 'mobile']):
    for year in YEARS:
        for quarter in range(1, 5):
            datatime = get_datatime(year, quarter)

            save_source(
                url=OOKLA_URL.format(st=st, dt=datatime, q=quarter),
                name=OOKLA_NAME.format(dt=datatime, st=st),
                folder=check_folder(f'{SOURCE_PATH}/{st}/{year}'),
            )

#### fixed
Загрузим данные фиксированной широкополосной сети:

In [6]:
save_source_ookla('fixed')

2023-05-21 02:53:45 - root - INFO - Saving: 2019-01_fixed_tiles.zip
2023-05-21 02:54:45 - root - INFO - Saving: 2019-04_fixed_tiles.zip
2023-05-21 02:55:38 - root - INFO - Saving: 2019-07_fixed_tiles.zip
2023-05-21 02:56:42 - root - INFO - Saving: 2019-10_fixed_tiles.zip
2023-05-21 02:58:26 - root - INFO - Saving: 2020-01_fixed_tiles.zip
2023-05-21 02:59:45 - root - INFO - Saving: 2020-04_fixed_tiles.zip
2023-05-21 03:01:06 - root - INFO - Saving: 2020-07_fixed_tiles.zip
2023-05-21 03:02:42 - root - INFO - Saving: 2020-10_fixed_tiles.zip
2023-05-21 03:04:12 - root - INFO - Saving: 2021-01_fixed_tiles.zip
2023-05-21 03:05:18 - root - INFO - Saving: 2021-04_fixed_tiles.zip
2023-05-21 03:06:18 - root - INFO - Saving: 2021-07_fixed_tiles.zip
2023-05-21 03:07:43 - root - INFO - Saving: 2021-10_fixed_tiles.zip
2023-05-21 03:08:58 - root - INFO - Saving: 2022-01_fixed_tiles.zip
2023-05-21 03:10:19 - root - INFO - Saving: 2022-04_fixed_tiles.zip
2023-05-21 03:11:35 - root - INFO - Saving: 2022

#### mobile
Загрузим данные мобильной широкополосной сети:

In [7]:
save_source_ookla('mobile')

2023-05-21 03:13:36 - root - INFO - Saving: 2019-01_mobile_tiles.zip
2023-05-21 03:14:36 - root - INFO - Saving: 2019-04_mobile_tiles.zip
2023-05-21 03:15:53 - root - INFO - Saving: 2019-07_mobile_tiles.zip
2023-05-21 03:16:52 - root - INFO - Saving: 2019-10_mobile_tiles.zip
2023-05-21 03:17:37 - root - INFO - Saving: 2020-01_mobile_tiles.zip
2023-05-21 03:18:27 - root - INFO - Saving: 2020-04_mobile_tiles.zip
2023-05-21 03:19:26 - root - INFO - Saving: 2020-07_mobile_tiles.zip
2023-05-21 03:20:14 - root - INFO - Saving: 2020-10_mobile_tiles.zip
2023-05-21 03:21:06 - root - INFO - Saving: 2021-01_mobile_tiles.zip
2023-05-21 03:21:48 - root - INFO - Saving: 2021-04_mobile_tiles.zip
2023-05-21 03:22:31 - root - INFO - Saving: 2021-07_mobile_tiles.zip
2023-05-21 03:23:21 - root - INFO - Saving: 2021-10_mobile_tiles.zip
2023-05-21 03:24:04 - root - INFO - Saving: 2022-01_mobile_tiles.zip
2023-05-21 03:24:49 - root - INFO - Saving: 2022-04_mobile_tiles.zip
2023-05-21 03:25:47 - root - INFO 

### IMF
Загрузим наборы данных IMF за выбранный период.

In [8]:
IMF_URL = 'https://www.imf.org/external/datamapper/api/v1/{indicator}?periods={periods}'


def save_source_raw_imf(indicator: str, name: str, key: str | None = 'values') -> dict:
    save_source(
        url=IMF_URL.format(indicator=indicator, periods=",".join(map(str, YEARS))),
        name=name,
        folder=SOURCE_PATH
    )

    with open(f'{SOURCE_PATH}/{name}', mode='r') as r:
        if key:
            raw: dict = json.load(r)[key][indicator]
        else:
            raw: dict = json.load(r)[indicator]

    with open(f'{SOURCE_PATH}/{name}', mode='w') as w:
        json.dump(raw, w)

    return raw

Загрузим данные по индикатору `PPPPC`:
* `PPPPC` - индикатор, характеризующий ВВП на душу населения - общую стоимость конечных товаров и услуг, произведенных в стране в пересчете на ППС, деленную на среднюю численность населения.

In [9]:
IMF_GDP_INDICATOR = 'PPPPC'
IMF_GDP_NAME = 'raw_gdp.json'

In [10]:
raw_gdp = save_source_raw_imf(IMF_GDP_INDICATOR, IMF_GDP_NAME)

2023-05-25 06:01:49 - root - INFO - Saving: raw_gdp.json


В дальнейшем предлагается вычесть из данного индикатора влияние инфляции для получения данных в базовых ценах.

Загрузим данные по индикатору `PCPIEPCH`:
* `PCPIEPCH` - индикатор, характеризующий процентный прирост инфляции в стране.

In [11]:
IMF_INFLATION_INDICATOR = 'PCPIEPCH'
IMF_INFLATION_NAME = 'raw_inflation.json'

In [12]:
raw_inflation = save_source_raw_imf(IMF_INFLATION_INDICATOR, IMF_INFLATION_NAME)

2023-05-25 06:01:53 - root - INFO - Saving: raw_inflation.json


Для удобства интерпретации к набору данных предлагается присоединение информации о наименовании стран.

Загрузим данные по индикатору `countries`:

In [13]:
IMF_COUNTRIES_INDICATOR = 'countries'
IMF_COUNTRIES_NAME = 'raw_labels.json'

In [14]:
raw_labels = save_source_raw_imf(IMF_COUNTRIES_INDICATOR, IMF_COUNTRIES_NAME, key=None)

2023-05-25 06:01:56 - root - INFO - Saving: raw_labels.json


Объединим загруженные показатели и сохраним общий набор данных.

In [15]:
def save_source_imf():

    def get_raw():
        for iso_a3, values in raw_gdp.items():
            label: dict = raw_labels.get(iso_a3)
            inflation: dict = raw_inflation.get(iso_a3)

            if label and inflation and len(values) == len(inflation) == len(YEARS):
                for year, income in values.items():
                    yield {
                        'iso_a3': iso_a3,
                        'year': year,
                        **label,
                        'income': round(income / (1 + inflation[year] / 100), 3),
                    }

    def get_grouped(raw: dict):
        for group, values in pd.DataFrame(raw).groupby('year'):
            values['group'] = pd.qcut(
                values['income'], 4,
                labels=[
                    'Low income',
                    'Lower-middle income',
                    'Upper-middle income',
                    'High income',
                ]
            )

            yield values

    df = pd.concat(get_grouped(get_raw()))

    logging.info(f'Saving: {IMF_NAME}')

    df.to_csv(f'{SOURCE_PATH}/{IMF_NAME}', index=False)

In [16]:
save_source_imf()

2023-05-25 06:01:59 - root - INFO - Saving: imf.csv


### NaturalEarth
Загрузим набор данных NaturalEarth.

In [17]:
NATURALEARTH_URL = 'https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_lakes.zip'

In [18]:
save_source(
    url=NATURALEARTH_URL,
    name=NATURALEARTH_NAME,
    folder=SOURCE_PATH
)

2023-05-25 06:02:02 - root - INFO - Saving: map.zip


## Merged
Преобразуем загруженные исходные данные - агрегируем квартальные данные Ookla в годовые и объединим с показателями IMF и NaturalEarth.

In [4]:
def optimize_dtypes(df: pd.DataFrame):
    for column in df.columns:
        if df[column].dtype == 'int':
            df[column] = pd.to_numeric(df[column], downcast='integer')
        elif df[column].dtype == 'float':
            df[column] = pd.to_numeric(df[column], downcast='float')

### IMF & NaturalEarth
Объединим наборы данных IMF и NaturalEarth.

Загрузим в память набор данных NaturalEarth и приведём его в нужный вид:

In [5]:
map_df: gpd.GeoDataFrame = gpd.read_file(f'{SOURCE_PATH}/{NATURALEARTH_NAME}', engine='pyogrio', columns=['GU_A3', 'CONTINENT'])

map_df.rename(columns=str.lower, inplace=True)
map_df.set_index('gu_a3', inplace=True)

Присоединим к нему набор данных IMF:

In [6]:
imf_df: gpd.GeoDataFrame = map_df.join(pd.read_csv(f'{SOURCE_PATH}/{IMF_NAME}').set_index('iso_a3'), how='inner').dropna()
optimize_dtypes(imf_df)

display(imf_df)

Unnamed: 0_level_0,continent,geometry,year,label,income,group
gu_a3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
ABW,North America,"POLYGON ((-69.99694 12.57758, -69.93639 12.531...",2019,Aruba,40003.319,High income
ABW,North America,"POLYGON ((-69.99694 12.57758, -69.93639 12.531...",2020,Aruba,35475.139,High income
ABW,North America,"POLYGON ((-69.99694 12.57758, -69.93639 12.531...",2021,Aruba,40472.824,High income
ABW,North America,"POLYGON ((-69.99694 12.57758, -69.93639 12.531...",2022,Aruba,44696.624,High income
AGO,Africa,"MULTIPOLYGON (((13.07370 -4.63532, 13.06533 -4...",2019,Angola,5882.391,Lower-middle income
...,...,...,...,...,...,...
ZMB,Africa,"POLYGON ((32.92086 -9.40790, 32.92303 -9.46629...",2022,Zambia,3499.386,Low income
ZWE,Africa,"POLYGON ((25.25978 -17.79411, 25.26671 -17.800...",2019,Zimbabwe,369.445,Low income
ZWE,Africa,"POLYGON ((25.25978 -17.79411, 25.26671 -17.800...",2020,Zimbabwe,468.710,Low income
ZWE,Africa,"POLYGON ((25.25978 -17.79411, 25.26671 -17.800...",2021,Zimbabwe,1453.882,Low income


Проведём описательную статистику полученного набора данных:

In [7]:
imf_df.groupby('year').describe().round(2)

Unnamed: 0_level_0,income,income,income,income,income,income,income,income
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2019,190.0,22260.58,23091.9,84.34,4782.35,14036.33,33220.37,121858.04
2020,190.0,21139.18,21957.86,188.43,4851.99,12916.77,32416.8,119607.09
2021,190.0,22454.66,23359.11,734.79,4922.43,13674.71,34197.28,122455.78
2022,190.0,23919.35,24988.23,676.0,5136.15,14165.08,37109.78,130002.86


Таким образом, набор данных состоит из 760 наблюдений - по 190 стран за период с 2019 по 2022 годы.
При этом наблюдается тенденция к росту среднего показателя дохода - за исключением падения показателя в 2020 году.

### Ookla
Агрегируем загруженные исходные датасеты Ookla по годам и объединим их с полученным ранее набором данных.

In [8]:
def _get_quarter_source(st: Literal['fixed', 'mobile'], y: int, q: int) -> pd.DataFrame:
    """
    Get source dataset by quarter

    :param st: serial_type
    :param y: year
    :param q: quarter
    :return: DataFrame
    """

    dt = get_datatime(y, q)
    name = OOKLA_NAME.format(dt=dt, st=st)

    logging.info(f'Opening: {name}')

    df = gpd.read_file(f'{SOURCE_PATH}/{st}/{y}/{name}', engine='pyogrio')
    optimize_dtypes(df)

    return df


def save_year_merged(st: Literal['fixed', 'mobile'], y: int):
    """
    Save merged dataset by year

    :param st: serial_type
    :param y: year
    """

    df = gpd.GeoDataFrame(
        pd.concat(
            [_get_quarter_source(st, y, q) for q in range(1, 5)],
            join='inner', ignore_index=True
        )
    )

    logging.info('Dissolving...')

    df = df.dissolve(
        by='quadkey',
        aggfunc={
            'avg_d_kbps': 'mean',
            'avg_u_kbps': 'mean',
            'avg_lat_ms': 'mean',
            'tests': 'sum',
            'devices': 'sum',
        }
    )

    logging.info(f'Joining...')

    df = df.sjoin(imf_df[imf_df['year'] == y], how='inner').rename(columns={'index_right': 'iso_a3'})

    logging.info('Saving...')

    folder = check_folder(f'{MERGED_PATH}/{st}')
    df.to_file(f'{folder}/{y}_{st}_merged', engine='pyogrio')

    display(df)

In [9]:
def _get_year_merged(st: Literal['fixed', 'mobile'], y: int) -> pd.DataFrame:
    """
    Get source dataset by quarter

    :param st: serial_type
    :param y: year
    :return: DataFrame
    """

    name = f'{y}_{st}_merged'

    logging.info(f'Opening: {name}')

    df = gpd.read_file(f'{MERGED_PATH}/{st}/{name}', engine='pyogrio', read_geometry=False)
    optimize_dtypes(df)

    return df.drop(columns='quadkey')


def save_merged(st: Literal['fixed', 'mobile']):
    """
    Save merged dataset

    :param st: serial_type
    """

    df = pd.concat(
        [_get_year_merged(st, y) for y in YEARS],
        join='inner', ignore_index=True
    )

    logging.info('Saving...')

    folder = check_folder(f'{MERGED_PATH}/{st}')
    df.to_csv(f'{folder}/{st}_merged.csv', index=False)

    display(df)

#### fixed
Агрегируем датасеты фиксированной широкополосной сети по годам.

In [10]:
save_year_merged('fixed', 2019)

2023-05-25 06:03:43 - root - INFO - Opening: 2019-07_fixed_tiles.zip
2023-05-25 06:03:43 - root - INFO - Opening: 2019-01_fixed_tiles.zip
2023-05-25 06:03:43 - root - INFO - Opening: 2019-04_fixed_tiles.zip
2023-05-25 06:03:43 - root - INFO - Opening: 2019-10_fixed_tiles.zip
2023-05-25 06:04:15 - root - INFO - Finished: 2019-04_fixed_tiles.zip
2023-05-25 06:04:17 - root - INFO - Finished: 2019-01_fixed_tiles.zip
2023-05-25 06:04:21 - root - INFO - Finished: 2019-07_fixed_tiles.zip
2023-05-25 06:04:28 - root - INFO - Finished: 2019-10_fixed_tiles.zip
2023-05-25 06:05:03 - root - INFO - Dissolving...
2023-05-25 06:25:14 - root - INFO - Joining...
2023-05-25 07:08:31 - root - INFO - Saving...
  ogr_write(
2023-05-25 09:21:26 - pyogrio._io - INFO - Created 9,670,476 records


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0022133222312322,"POLYGON ((-160.02136 70.64359, -160.02136 70.6...",8200.5,3705.0,53.000000,2,2,USA,North America,2019,United States,63738.781,High income
0022133222330013,"POLYGON ((-160.02686 70.63995, -160.02686 70.6...",9255.5,3189.5,43.500000,2,2,USA,North America,2019,United States,63738.781,High income
0022133222330023,"POLYGON ((-160.03784 70.63448, -160.04333 70.6...",7946.0,3492.5,44.500000,6,5,USA,North America,2019,United States,63738.781,High income
0022133222330032,"POLYGON ((-160.03784 70.63631, -160.03235 70.6...",7384.0,3868.0,65.000000,1,1,USA,North America,2019,United States,63738.781,High income
0022133222330100,"POLYGON ((-160.02686 70.64177, -160.02136 70.6...",8895.0,3429.0,43.000000,2,2,USA,North America,2019,United States,63738.781,High income
...,...,...,...,...,...,...,...,...,...,...,...,...
3131120221112023,"POLYGON ((169.37073 -46.60039, 169.37622 -46.6...",4753.0,4122.0,9.000000,1,1,NZL,Oceania,2019,New Zealand,43137.936,High income
3131120230000011,"POLYGON ((169.47510 -46.55886, 169.47510 -46.5...",16384.0,4677.0,62.333333,4,4,NZL,Oceania,2019,New Zealand,43137.936,High income
3131120230010113,"POLYGON ((169.57947 -46.56264, 169.58496 -46.5...",359.0,494.0,15.000000,1,1,NZL,Oceania,2019,New Zealand,43137.936,High income
3131120300000111,"POLYGON ((170.20020 -46.07323, 170.20020 -46.0...",18896.5,7404.0,45.000000,3,2,NZL,Oceania,2019,New Zealand,43137.936,High income


In [10]:
save_year_merged('fixed', 2020)

2023-05-25 16:34:08 - root - INFO - Opening: 2020-01_fixed_tiles.zip
2023-05-25 16:34:59 - root - INFO - Opening: 2020-04_fixed_tiles.zip
2023-05-25 16:35:52 - root - INFO - Opening: 2020-07_fixed_tiles.zip
2023-05-25 16:36:45 - root - INFO - Opening: 2020-10_fixed_tiles.zip
2023-05-25 16:37:34 - root - INFO - Dissolving...
2023-05-25 17:08:53 - root - INFO - Joining...
2023-05-25 18:12:06 - root - INFO - Saving...
  ogr_write(
2023-05-25 18:27:13 - pyogrio._io - INFO - Created 11,093,221 records


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0022133222312322,"POLYGON ((-160.02686 70.64359, -160.02136 70.6...",5227.000000,2449.000000,54.000000,2,2,USA,North America,2020,United States,62576.123,High income
0022133222313202,"POLYGON ((-160.00488 70.64723, -159.99939 70.6...",8630.000000,3638.000000,44.000000,1,1,USA,North America,2020,United States,62576.123,High income
0022133222330010,"POLYGON ((-160.03784 70.64177, -160.03235 70.6...",9529.000000,3214.000000,44.000000,1,1,USA,North America,2020,United States,62576.123,High income
0022133222330011,"POLYGON ((-160.03235 70.64177, -160.02686 70.6...",9392.000000,3390.000000,42.000000,1,1,USA,North America,2020,United States,62576.123,High income
0022133222330012,"POLYGON ((-160.03784 70.63995, -160.03235 70.6...",8644.000000,3640.000000,64.000000,1,1,USA,North America,2020,United States,62576.123,High income
...,...,...,...,...,...,...,...,...,...,...,...,...
3131120221103101,"POLYGON ((169.35425 -46.58907, 169.35425 -46.5...",32057.333333,17642.000000,18.666667,16,3,NZL,Oceania,2020,New Zealand,42414.555,High income
3131120221103110,"POLYGON ((169.35974 -46.58907, 169.35974 -46.5...",21008.333333,23030.333333,17.333333,27,14,NZL,Oceania,2020,New Zealand,42414.555,High income
3131120230000011,"POLYGON ((169.46960 -46.55886, 169.47510 -46.5...",23052.000000,3830.000000,69.000000,1,1,NZL,Oceania,2020,New Zealand,42414.555,High income
3131120300000113,"POLYGON ((170.20020 -46.07704, 170.20020 -46.0...",9245.666667,4296.000000,11.666667,9,6,NZL,Oceania,2020,New Zealand,42414.555,High income


In [11]:
save_year_merged('fixed', 2021)

2023-05-25 18:27:41 - root - INFO - Opening: 2021-01_fixed_tiles.zip
2023-05-25 18:28:31 - root - INFO - Opening: 2021-04_fixed_tiles.zip
2023-05-25 18:29:21 - root - INFO - Opening: 2021-07_fixed_tiles.zip
2023-05-25 18:30:13 - root - INFO - Opening: 2021-10_fixed_tiles.zip
2023-05-25 18:31:05 - root - INFO - Dissolving...
2023-05-25 19:01:57 - root - INFO - Joining...
2023-05-25 20:03:17 - root - INFO - Saving...
  ogr_write(
2023-05-25 20:42:48 - pyogrio._io - INFO - Created 11,068,904 records


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0022133222330013,"POLYGON ((-160.03235 70.63995, -160.02686 70.6...",7750.000000,2808.000000,41.000000,3,2,USA,North America,2021,United States,65325.674,High income
0022133222330023,"POLYGON ((-160.04333 70.63631, -160.03784 70.6...",863.000000,413.000000,43.000000,1,1,USA,North America,2021,United States,65325.674,High income
0022133222330030,"POLYGON ((-160.03784 70.63813, -160.03235 70.6...",10840.000000,25007.000000,28.000000,2,2,USA,North America,2021,United States,65325.674,High income
0022133222330031,"POLYGON ((-160.03235 70.63813, -160.02686 70.6...",14002.000000,23610.000000,27.000000,1,1,USA,North America,2021,United States,65325.674,High income
0022133222330032,"POLYGON ((-160.03235 70.63631, -160.03235 70.6...",23073.000000,26313.666667,28.333333,3,3,USA,North America,2021,United States,65325.674,High income
...,...,...,...,...,...,...,...,...,...,...,...,...
3131120230000011,"POLYGON ((169.47510 -46.55886, 169.47510 -46.5...",23957.500000,15007.000000,24.500000,7,2,NZL,Oceania,2021,New Zealand,44798.055,High income
3131120300000111,"POLYGON ((170.19470 -46.07323, 170.20020 -46.0...",9833.000000,4695.000000,14.000000,1,1,NZL,Oceania,2021,New Zealand,44798.055,High income
3131120300000113,"POLYGON ((170.20020 -46.07704, 170.20020 -46.0...",9542.000000,4428.333333,17.000000,13,5,NZL,Oceania,2021,New Zealand,44798.055,High income
3131120300000121,"POLYGON ((170.18372 -46.08085, 170.18921 -46.0...",30668.000000,13818.000000,23.000000,1,1,NZL,Oceania,2021,New Zealand,44798.055,High income


In [12]:
save_year_merged('fixed', 2022)

2023-05-25 20:43:17 - root - INFO - Opening: 2022-01_fixed_tiles.zip
2023-05-25 20:44:08 - root - INFO - Opening: 2022-04_fixed_tiles.zip
2023-05-25 20:44:58 - root - INFO - Opening: 2022-07_fixed_tiles.zip
2023-05-25 20:45:46 - root - INFO - Opening: 2022-10_fixed_tiles.zip
2023-05-25 20:46:36 - root - INFO - Dissolving...
2023-05-25 21:15:47 - root - INFO - Joining...
2023-05-25 22:15:04 - root - INFO - Saving...
  ogr_write(
2023-05-25 23:21:07 - pyogrio._io - INFO - Created 10,754,742 records


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0022133222312233,"POLYGON ((-160.03235 70.64359, -160.02686 70.6...",125266.000000,15523.000000,127.000000,1,1,USA,North America,2022,United States,71621.477,High income
0022133222312322,"POLYGON ((-160.02136 70.64359, -160.02136 70.6...",66682.666667,8126.000000,95.666667,22,5,USA,North America,2022,United States,71621.477,High income
0022133222330013,"POLYGON ((-160.03235 70.63995, -160.02686 70.6...",223427.000000,23185.000000,91.000000,1,1,USA,North America,2022,United States,71621.477,High income
0022133222330023,"POLYGON ((-160.04333 70.63631, -160.03784 70.6...",125703.000000,14623.000000,270.000000,3,1,USA,North America,2022,United States,71621.477,High income
0022133222330032,"POLYGON ((-160.03784 70.63631, -160.03235 70.6...",1773.000000,31968.000000,33.000000,1,1,USA,North America,2022,United States,71621.477,High income
...,...,...,...,...,...,...,...,...,...,...,...,...
3111231133022000,"POLYGON ((168.39844 -17.76961, 168.40393 -17.7...",6647.000000,2801.000000,72.000000,3,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
3111231133022010,"POLYGON ((168.40942 -17.76961, 168.41492 -17.7...",28101.000000,4131.000000,611.000000,2,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
3111231133023002,"POLYGON ((168.44238 -17.77484, 168.44788 -17.7...",19721.000000,3908.000000,594.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
3111231133023003,"POLYGON ((168.44788 -17.77484, 168.45337 -17.7...",18362.000000,3344.000000,603.000000,5,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income


Объединим полученные датасеты в панельные данные.
*Для объединения данных за 4 года необходимо более 20ГБ ОЗУ*

In [10]:
save_merged('fixed')

2023-05-26 00:01:55 - root - INFO - Opening: 2019_fixed_merged
2023-05-26 00:03:53 - root - INFO - Opening: 2020_fixed_merged
2023-05-26 00:07:42 - root - INFO - Opening: 2021_fixed_merged
2023-05-26 00:12:08 - root - INFO - Opening: 2022_fixed_merged
2023-05-26 00:15:59 - root - INFO - Saving...


Unnamed: 0,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
0,8200.500000,3705.000000,53.000000,2,2,USA,North America,2019,United States,63738.781,High income
1,9255.500000,3189.500000,43.500000,2,2,USA,North America,2019,United States,63738.781,High income
2,7946.000000,3492.500000,44.500000,6,5,USA,North America,2019,United States,63738.781,High income
3,7384.000000,3868.000000,65.000000,1,1,USA,North America,2019,United States,63738.781,High income
4,8895.000000,3429.000000,43.000000,2,2,USA,North America,2019,United States,63738.781,High income
...,...,...,...,...,...,...,...,...,...,...,...
42587338,6647.000000,2801.000000,72.000000,3,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
42587339,28101.000000,4131.000000,611.000000,2,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
42587340,19721.000000,3908.000000,594.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
42587341,18362.000000,3344.000000,603.000000,5,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income


#### mobile
Агрегируем датасеты мобильной широкополосной сети по годам.

In [10]:
save_year_merged('mobile', 2019)

2023-05-27 03:23:45 - root - INFO - Opening: 2019-01_mobile_tiles.zip
2023-05-27 03:24:13 - root - INFO - Opening: 2019-04_mobile_tiles.zip
2023-05-27 03:24:41 - root - INFO - Opening: 2019-07_mobile_tiles.zip
2023-05-27 03:25:14 - root - INFO - Opening: 2019-10_mobile_tiles.zip
2023-05-27 03:25:48 - root - INFO - Dissolving...
2023-05-27 03:44:32 - root - INFO - Joining...
2023-05-27 04:26:20 - root - INFO - Saving...
  ogr_write(
2023-05-27 04:34:44 - pyogrio._io - INFO - Created 7,233,701 records


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0022133222312322,"POLYGON ((-160.02136 70.64359, -160.02136 70.6...",17598.000000,15304.500000,77.500000,3,3,USA,North America,2019,United States,63738.781,High income
0022133222312323,"POLYGON ((-160.01587 70.64359, -160.01587 70.6...",4780.000000,1467.000000,90.000000,2,2,USA,North America,2019,United States,63738.781,High income
0022133222330012,"POLYGON ((-160.03784 70.63995, -160.03235 70.6...",7928.000000,16550.000000,71.000000,1,1,USA,North America,2019,United States,63738.781,High income
0022133222330023,"POLYGON ((-160.03784 70.63631, -160.03784 70.6...",8982.666667,8022.333333,78.666667,11,6,USA,North America,2019,United States,63738.781,High income
0022133222330030,"POLYGON ((-160.03784 70.63813, -160.03235 70.6...",621.000000,5769.000000,67.000000,2,1,USA,North America,2019,United States,63738.781,High income
...,...,...,...,...,...,...,...,...,...,...,...,...
3131120213200021,"POLYGON ((169.81018 -46.44543, 169.81567 -46.4...",8175.000000,2368.000000,52.000000,1,1,NZL,Oceania,2019,New Zealand,43137.936,High income
3131120220010010,"POLYGON ((168.84888 -46.55886, 168.85437 -46.5...",1230.000000,1798.000000,55.000000,1,1,NZL,Oceania,2019,New Zealand,43137.936,High income
3131120220133110,"POLYGON ((169.09607 -46.64944, 169.09607 -46.6...",48375.000000,22392.000000,68.000000,6,3,NZL,Oceania,2019,New Zealand,43137.936,High income
3131120221022000,"POLYGON ((169.10156 -46.64944, 169.10706 -46.6...",106397.000000,36887.000000,75.000000,1,1,NZL,Oceania,2019,New Zealand,43137.936,High income


In [10]:
save_year_merged('mobile', 2020)

2023-05-27 06:47:02 - root - INFO - Opening: 2020-01_mobile_tiles.zip
2023-05-27 06:47:31 - root - INFO - Opening: 2020-04_mobile_tiles.zip
2023-05-27 06:48:02 - root - INFO - Opening: 2020-07_mobile_tiles.zip
2023-05-27 06:48:34 - root - INFO - Opening: 2020-10_mobile_tiles.zip
2023-05-27 06:49:07 - root - INFO - Dissolving...
2023-05-27 07:08:40 - root - INFO - Joining...
2023-05-27 07:56:13 - root - INFO - Saving...
  ogr_write(
2023-05-27 08:07:00 - pyogrio._io - INFO - Created 8,196,054 records


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0022133222312322,"POLYGON ((-160.02686 70.64359, -160.02136 70.6...",12065.0,22762.000000,74.000000,3,1,USA,North America,2020,United States,62576.123,High income
0022133222330012,"POLYGON ((-160.03235 70.63995, -160.03235 70.6...",9757.0,20605.000000,67.500000,4,3,USA,North America,2020,United States,62576.123,High income
0022133222330013,"POLYGON ((-160.03235 70.63995, -160.02686 70.6...",11702.0,20146.000000,68.000000,1,1,USA,North America,2020,United States,62576.123,High income
0022133222330023,"POLYGON ((-160.03784 70.63631, -160.03784 70.6...",5262.5,20486.000000,88.500000,5,3,USA,North America,2020,United States,62576.123,High income
0022133222330030,"POLYGON ((-160.03235 70.63813, -160.03235 70.6...",2087.5,1320.500000,95.500000,3,2,USA,North America,2020,United States,62576.123,High income
...,...,...,...,...,...,...,...,...,...,...,...,...
3131120221020110,"POLYGON ((169.13452 -46.61926, 169.14001 -46.6...",104544.0,46247.000000,48.000000,2,2,NZL,Oceania,2020,New Zealand,42414.555,High income
3131120221020130,"POLYGON ((169.13452 -46.62681, 169.14001 -46.6...",44208.0,12577.000000,56.000000,1,1,NZL,Oceania,2020,New Zealand,42414.555,High income
3131120221021032,"POLYGON ((169.16199 -46.63058, 169.16199 -46.6...",92879.5,46431.500000,70.000000,3,2,NZL,Oceania,2020,New Zealand,42414.555,High income
3131120221022000,"POLYGON ((169.10156 -46.64944, 169.10706 -46.6...",31926.0,37893.000000,680.000000,1,1,NZL,Oceania,2020,New Zealand,42414.555,High income


In [11]:
save_year_merged('mobile', 2021)

2023-05-27 10:02:38 - root - INFO - Opening: 2021-01_mobile_tiles.zip
2023-05-27 10:03:08 - root - INFO - Opening: 2021-04_mobile_tiles.zip
2023-05-27 10:03:38 - root - INFO - Opening: 2021-07_mobile_tiles.zip
2023-05-27 10:04:10 - root - INFO - Opening: 2021-10_mobile_tiles.zip
2023-05-27 10:04:41 - root - INFO - Dissolving...
2023-05-27 10:25:35 - root - INFO - Joining...
2023-05-27 11:14:47 - root - INFO - Saving...
  ogr_write(
2023-05-27 11:32:48 - pyogrio._io - INFO - Created 8,406,463 records


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0022133222330013,"POLYGON ((-160.02686 70.63995, -160.02686 70.6...",75.0,101.0,81.5,2,2,USA,North America,2021,United States,65325.674,High income
0022133222330023,"POLYGON ((-160.04333 70.63631, -160.03784 70.6...",8187.0,15491.0,86.0,3,1,USA,North America,2021,United States,65325.674,High income
0022133222330201,"POLYGON ((-160.03784 70.63448, -160.03784 70.6...",13987.0,18390.0,76.0,11,3,USA,North America,2021,United States,65325.674,High income
0022133222330203,"POLYGON ((-160.03784 70.63266, -160.03784 70.6...",19781.5,19936.0,72.0,4,2,USA,North America,2021,United States,65325.674,High income
0022302331031201,"POLYGON ((-166.15173 68.87144, -166.14624 68.8...",13099.0,1281.0,91.0,2,1,USA,North America,2021,United States,65325.674,High income
...,...,...,...,...,...,...,...,...,...,...,...,...
3131120221020112,"POLYGON ((169.13452 -46.62303, 169.14001 -46.6...",79516.0,44990.0,46.0,1,1,NZL,Oceania,2021,New Zealand,44798.055,High income
3131120221020130,"POLYGON ((169.14001 -46.62681, 169.14001 -46.6...",68499.5,6812.0,45.5,3,2,NZL,Oceania,2021,New Zealand,44798.055,High income
3131120221022002,"POLYGON ((169.10706 -46.65321, 169.10706 -46.6...",40800.0,34134.0,43.5,2,2,NZL,Oceania,2021,New Zealand,44798.055,High income
3131120230000010,"POLYGON ((169.46411 -46.55886, 169.46960 -46.5...",41670.0,8312.0,56.0,1,1,NZL,Oceania,2021,New Zealand,44798.055,High income


In [12]:
save_year_merged('mobile', 2022)

2023-05-27 11:39:08 - root - INFO - Opening: 2022-01_mobile_tiles.zip
2023-05-27 11:39:37 - root - INFO - Opening: 2022-04_mobile_tiles.zip
2023-05-27 11:40:09 - root - INFO - Opening: 2022-07_mobile_tiles.zip
2023-05-27 11:40:38 - root - INFO - Opening: 2022-10_mobile_tiles.zip
2023-05-27 11:41:08 - root - INFO - Dissolving...
2023-05-27 12:00:15 - root - INFO - Joining...
2023-05-27 12:46:57 - root - INFO - Saving...
  ogr_write(
2023-05-27 13:09:18 - pyogrio._io - INFO - Created 7,904,169 records


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
0022133222312322,"POLYGON ((-160.02136 70.64359, -160.02136 70.6...",17660.000000,14932.500000,82.500000,4,3,USA,North America,2022,United States,71621.477,High income
0022133222330023,"POLYGON ((-160.03784 70.63631, -160.03784 70.6...",19547.333333,14359.666667,107.666667,8,3,USA,North America,2022,United States,71621.477,High income
0022133222330031,"POLYGON ((-160.03235 70.63813, -160.02686 70.6...",6790.000000,22363.000000,68.000000,1,1,USA,North America,2022,United States,71621.477,High income
0022133222330032,"POLYGON ((-160.03235 70.63631, -160.03235 70.6...",13958.000000,14066.333333,156.333333,21,5,USA,North America,2022,United States,71621.477,High income
0022133222330100,"POLYGON ((-160.02136 70.64177, -160.02136 70.6...",7543.500000,12821.500000,87.500000,7,2,USA,North America,2022,United States,71621.477,High income
...,...,...,...,...,...,...,...,...,...,...,...,...
3111322001300302,"POLYGON ((169.29932 -19.50284, 169.30481 -19.5...",44504.000000,42754.000000,26.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
3111322001302220,"POLYGON ((169.27734 -19.54944, 169.28284 -19.5...",19814.000000,3052.000000,30.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
3111322001303122,"POLYGON ((169.34326 -19.53391, 169.34875 -19.5...",10342.000000,8000.000000,29.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
3111322001310032,"POLYGON ((169.37622 -19.49249, 169.38171 -19.4...",32576.000000,6667.000000,30.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income


Объединим полученные датасеты в панельные данные.
*Для объединения данных за 4 года необходимо более 20ГБ ОЗУ*

In [13]:
save_merged('mobile')

2023-05-27 13:19:20 - root - INFO - Opening: 2019_mobile_merged
2023-05-27 13:21:47 - root - INFO - Opening: 2020_mobile_merged
2023-05-27 13:24:32 - root - INFO - Opening: 2021_mobile_merged
2023-05-27 13:27:23 - root - INFO - Opening: 2022_mobile_merged
2023-05-27 13:30:13 - root - INFO - Saving...


Unnamed: 0,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,continent,year,label,income,group
0,17598.000000,15304.500000,77.500000,3,3,USA,North America,2019,United States,63738.781,High income
1,4780.000000,1467.000000,90.000000,2,2,USA,North America,2019,United States,63738.781,High income
2,7928.000000,16550.000000,71.000000,1,1,USA,North America,2019,United States,63738.781,High income
3,8982.666667,8022.333333,78.666664,11,6,USA,North America,2019,United States,63738.781,High income
4,621.000000,5769.000000,67.000000,2,1,USA,North America,2019,United States,63738.781,High income
...,...,...,...,...,...,...,...,...,...,...,...
31740382,44504.000000,42754.000000,26.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
31740383,19814.000000,3052.000000,30.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
31740384,10342.000000,8000.000000,29.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income
31740385,32576.000000,6667.000000,30.000000,1,1,VUT,Oceania,2022,Vanuatu,2754.916,Low income


## Light
Агрегируем полученные агрегированные датасеты по странам.

In [4]:
AGG_LIGHT = {
    'label': 'first',
    'income': 'first',
    'group': 'first',
    'continent': 'first',
    'avg_d_kbps': 'median',
    'avg_u_kbps': 'median',
    'avg_lat_ms': 'median',
    'tests': 'sum',
    'devices': 'sum',
}


def save_year_light(st: Literal['fixed', 'mobile'], y: int):
    """
    Aggregate to counties and save Ookla dataset by year

    :param st: serial_type
    :param y: year
    """

    df = (
        gpd.read_file(f'{MERGED_PATH}/{st}/{y}_{st}_merged', engine='pyogrio')
            .dissolve(by='iso_a3', aggfunc=AGG_LIGHT)  # aggregate values by countries
    )

    folder = check_folder(f'{LIGHT_PATH}/{st}')
    df.to_file(f'{folder}/{y}_{st}_light')

    display(df)

def save_light(st: Literal['fixed', 'mobile']):
    """
    Aggregate to counties and save Ookla dataset

    :param st: serial_type
    """

    df = (
        pd.read_csv(f'{MERGED_PATH}/{st}/{st}_merged.csv')
            .groupby(['iso_a3', 'year'])
            .agg(AGG_LIGHT)  # aggregate values by countries and years
    )

    folder = check_folder(f'{LIGHT_PATH}/{st}')
    df.to_csv(f'{folder}/{st}_light.csv')

    display(df)

#### fixed
Агрегируем датасеты фиксированной широкополосной сети по странам.

In [None]:
save_year_light('fixed', 2019)

In [None]:
save_year_light('fixed', 2020)

In [None]:
save_year_light('fixed', 2021)

In [5]:
save_year_light('fixed', 2022)

  _init_gdal_data()
  elif GDALDataFinder().find_file("header.dxf"):


Unnamed: 0_level_0,geometry,label,income,group,continent,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices
iso_a3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ABW,"MULTIPOLYGON (((-70.06531 12.54920, -70.05981 ...",Aruba,44696.624,High income,North America,92920.375000,13440.750000,11.125000,34937,12603
AGO,"MULTIPOLYGON (((11.85974 -15.80282, 11.85425 -...",Angola,6083.354,Lower-middle income,Africa,9369.500000,3655.500000,20.000000,44003,12758
ALB,"MULTIPOLYGON (((19.41833 40.32142, 19.42383 40...",Albania,16643.496,Upper-middle income,Europe,27983.000000,17895.000000,7.000000,386234,122539
AND,"MULTIPOLYGON (((1.47766 42.43157, 1.48315 42.4...",Andorra,62391.141,High income,Europe,126793.250000,120227.500000,4.750000,17654,7192
ARE,"MULTIPOLYGON (((52.08069 23.58413, 52.08069 23...",United Arab Emirates,79892.094,High income,Asia,181665.250000,105613.333333,6.500000,2915306,1007779
...,...,...,...,...,...,...,...,...,...,...
WSM,"MULTIPOLYGON (((-171.79321 -13.85941, -171.793...",Samoa,5264.731,Lower-middle income,Oceania,10487.000000,9682.333333,10.000000,148,64
YEM,"MULTIPOLYGON (((42.97302 14.73239, 42.97302 14...",Yemen,1776.223,Low income,Asia,3516.125000,969.875000,67.000000,170910,36332
ZAF,"MULTIPOLYGON (((18.24280 -33.41310, 18.24829 -...",South Africa,14635.377,Upper-middle income,Africa,14159.333333,8601.333333,22.666667,4285023,1005319
ZMB,"MULTIPOLYGON (((25.88379 -17.97873, 25.88379 -...",Zambia,3499.386,Low income,Africa,5585.000000,4903.000000,51.500000,7463,2541


In [6]:
save_light('fixed')

Unnamed: 0_level_0,Unnamed: 1_level_0,label,income,group,continent,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices
iso_a3,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ABW,2019,Aruba,40003.319,High income,North America,59777.500,10904.000,12.000,29830,10256
ABW,2020,Aruba,35475.139,High income,North America,73356.500,13081.500,12.000,32927,9793
ABW,2021,Aruba,40472.824,High income,North America,84352.500,13256.500,11.250,30696,11231
ABW,2022,Aruba,44696.624,High income,North America,92920.375,13440.750,11.125,34937,12603
AGO,2019,Angola,5882.391,Lower-middle income,Africa,7745.500,2805.000,27.000,37780,11800
...,...,...,...,...,...,...,...,...,...,...
ZMB,2022,Zambia,3499.386,Low income,Africa,5585.000,4903.000,51.500,7463,2541
ZWE,2019,Zimbabwe,369.445,Low income,Africa,7779.750,4636.625,26.500,20461,7358
ZWE,2020,Zimbabwe,468.710,Low income,Africa,7570.500,4837.000,28.000,33775,9534
ZWE,2021,Zimbabwe,1453.882,Low income,Africa,7943.500,4982.000,28.250,27417,7922


#### mobile
Агрегируем датасеты мобильной широкополосной сети по странам.

In [None]:
save_year_light('mobile', 2019)

In [None]:
save_year_light('mobile', 2020)

In [None]:
save_year_light('mobile', 2021)

In [5]:
save_year_light('mobile', 2022)

Unnamed: 0_level_0,geometry,label,income,group,continent,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices
iso_a3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ABW,"MULTIPOLYGON (((-70.05981 12.53848, -70.06531 ...",Aruba,44696.624,High income,North America,60336.250000,17170.000000,21.500000,2963,1424
AGO,"MULTIPOLYGON (((11.83777 -15.80282, 11.83777 -...",Angola,6083.354,Lower-middle income,Africa,16096.000000,7803.000000,31.000000,13491,4776
ALB,"MULTIPOLYGON (((19.45129 40.32561, 19.45129 40...",Albania,16643.496,Upper-middle income,Europe,32679.041667,7957.500000,22.500000,36088,20781
AND,"MULTIPOLYGON (((1.46667 42.43157, 1.46667 42.4...",Andorra,62391.141,High income,Europe,90926.000000,19227.375000,25.416667,723,370
ARE,"MULTIPOLYGON (((52.53662 22.99885, 52.53662 22...",United Arab Emirates,79892.094,High income,Asia,118695.750000,21724.750000,24.000000,214227,74230
...,...,...,...,...,...,...,...,...,...,...
WSM,"MULTIPOLYGON (((-172.51282 -13.79007, -172.507...",Samoa,5264.731,Lower-middle income,Oceania,14609.750000,8078.166667,18.000000,1927,1031
YEM,"MULTIPOLYGON (((42.95105 14.73770, 42.95105 14...",Yemen,1776.223,Low income,Asia,8698.000000,4834.375000,33.000000,86227,24861
ZAF,"MULTIPOLYGON (((18.37463 -34.20272, 18.37463 -...",South Africa,14635.377,Upper-middle income,Africa,31269.333333,7978.000000,27.000000,411941,208488
ZMB,"MULTIPOLYGON (((25.82886 -17.90557, 25.82886 -...",Zambia,3499.386,Low income,Africa,13285.000000,9542.000000,31.333333,13214,5415


In [6]:
save_light('mobile')

Unnamed: 0_level_0,Unnamed: 1_level_0,label,income,group,continent,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices
iso_a3,year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
ABW,2019,Aruba,40003.319,High income,North America,47282.333333,16993.500000,23.250000,3325,1487
ABW,2020,Aruba,35475.139,High income,North America,53285.250000,17919.333333,19.000000,2816,1384
ABW,2021,Aruba,40472.824,High income,North America,63290.250000,17802.750000,19.000000,2922,1240
ABW,2022,Aruba,44696.624,High income,North America,60336.250000,17170.000000,21.500000,2963,1424
AGO,2019,Angola,5882.391,Lower-middle income,Africa,10556.000000,3490.000000,42.000000,6531,2570
...,...,...,...,...,...,...,...,...,...,...
ZMB,2022,Zambia,3499.386,Low income,Africa,13285.000000,9542.000000,31.333334,13214,5415
ZWE,2019,Zimbabwe,369.445,Low income,Africa,9353.500000,4488.333333,34.000000,4661,2059
ZWE,2020,Zimbabwe,468.710,Low income,Africa,7868.000000,3493.000000,33.000000,6372,2206
ZWE,2021,Zimbabwe,1453.882,Low income,Africa,8628.000000,4354.000000,37.000000,10106,3728


На этом этап по подготовке данных окончен.