# Подготовка данных Speedtest
В рамках данного ноутбука загрузим необходимые данные и подготовим их к дальнейшему анализу

## Подготовка к работе
Оптимизируем дальнейший код и сразу заполним пространство всем необходимым функционалом.

Импортируем все необходимые библиотеки

In [8]:
import json
from typing import Literal

import requests
import os
from datetime import datetime

import geopandas as gpd
import pandas as pd

## Загрузка исходных данных
На данном этапе необходимо определиться - за какой период брать данные. Каждый год в общей сумме занимает около 8 ГБ в оперативной памяти - если их будет не хватать, скрипт не выполнится полностью

In [2]:
SOURCE_PATH = 'data/source'

YEARS = range(2019, 2023)  # (2019, 2020, 2021, 2022)

In [3]:
def check_folder_name(folder_name: str) -> str:
    """If folder not exists - create, after return path"""

    if not os.path.exists(folder_name):
        os.makedirs(folder_name)

    return folder_name

In [4]:
def save_source(url: str, name: str, folder: str):
    print(f'Saving: {name}')

    folder = check_folder_name(folder)
    file = f'{folder}/{name}'

    with requests.get(url) as r:
        r.raise_for_status()

        with open(file, 'wb') as f:
            f.write(r.content)

### Ookla
Загрузим датасеты Ookla за выбранный период

In [5]:
def get_datatime(y: int, q: int) -> datetime:
    """
    Format year and quarter to datatime

    :param y: year
    :param q: quarter
    :return: datetime
    """

    if not 1 <= q <= 4:
        raise ValueError("Quarter must be within [1, 2, 3, 4]")

    return datetime(y, range(1, 12, 3)[q - 1], 1)

In [6]:
OOKLA_URL = 'https://ookla-open-data.s3-us-west-2.amazonaws.com/shapefiles/performance/type%3D{st}/year%3D{dt:%Y}/quarter%3D{q}/{dt:%Y-%m-%d}_performance_{st}_tiles.zip'
OOKLA_NAME = '{dt:%Y-%m}_{st}_tiles.zip'


def save_source_ookla():
    for serial_type in ('fixed', 'mobile'):
        for year in YEARS:
            for quarter in range(1, 5):
                datatime = get_datatime(year, quarter)
                name = OOKLA_NAME.format(dt=datatime, st=serial_type)

                save_source(
                    url=OOKLA_URL.format(st=serial_type, dt=datatime, q=quarter),
                    name=name,
                    folder=f'{SOURCE_PATH}/{serial_type}/{year}',
                )

In [7]:
save_source_ookla()

Saving: 2019-01_fixed_tiles.zip
Saving: 2019-04_fixed_tiles.zip
Saving: 2019-07_fixed_tiles.zip
Saving: 2019-10_fixed_tiles.zip
Saving: 2020-01_fixed_tiles.zip
Saving: 2020-04_fixed_tiles.zip
Saving: 2020-07_fixed_tiles.zip
Saving: 2020-10_fixed_tiles.zip
Saving: 2021-01_fixed_tiles.zip
Saving: 2021-04_fixed_tiles.zip
Saving: 2021-07_fixed_tiles.zip
Saving: 2021-10_fixed_tiles.zip
Saving: 2022-01_fixed_tiles.zip
Saving: 2022-04_fixed_tiles.zip
Saving: 2022-07_fixed_tiles.zip
Saving: 2022-10_fixed_tiles.zip
Saving: 2019-01_mobile_tiles.zip
Saving: 2019-04_mobile_tiles.zip
Saving: 2019-07_mobile_tiles.zip
Saving: 2019-10_mobile_tiles.zip
Saving: 2020-01_mobile_tiles.zip
Saving: 2020-04_mobile_tiles.zip
Saving: 2020-07_mobile_tiles.zip
Saving: 2020-10_mobile_tiles.zip
Saving: 2021-01_mobile_tiles.zip
Saving: 2021-04_mobile_tiles.zip
Saving: 2021-07_mobile_tiles.zip
Saving: 2021-10_mobile_tiles.zip
Saving: 2022-01_mobile_tiles.zip
Saving: 2022-04_mobile_tiles.zip
Saving: 2022-07_mobile_til

### IMF
Начнём загрузку данных от IMF. Предварительно очистим их от лишних ключей

In [7]:
IMF_URL = 'https://www.imf.org/external/datamapper/api/v1/{indicator}?periods={periods}'

Начнём с данных о ВВП по ППС, в текущих ценах

In [8]:
IMF_GDP_INDICATOR = 'PPPPC'
IMF_GDP_NAME = 'gdp_raw.json'

In [9]:
save_source(
    url=IMF_URL.format(indicator=IMF_GDP_INDICATOR, periods=",".join(map(str, YEARS))),
    name=IMF_GDP_NAME,
    folder=SOURCE_PATH
)

with open(f'{SOURCE_PATH}/{IMF_GDP_NAME}', mode='r') as r:
    gdp_raw: dict = json.load(r)['values'][IMF_GDP_INDICATOR]

with open(f'{SOURCE_PATH}/{IMF_GDP_NAME}', mode='w') as w:
    json.dump(gdp_raw, w)

Saving: gdp_raw.json


Чтобы вычесть влияние инфляции, скачаем данные и по ней

In [10]:
IMF_INFLATION_INDICATOR = 'PCPIEPCH'
IMF_INFLATION_NAME = 'inflation_raw.json'

In [11]:
save_source(
    url=IMF_URL.format(indicator=IMF_INFLATION_INDICATOR, periods=",".join(map(str, YEARS))),
    name=IMF_INFLATION_NAME,
    folder=SOURCE_PATH
)

with open(f'{SOURCE_PATH}/{IMF_INFLATION_NAME}', mode='r') as r:
    inflation_raw: dict = json.load(r)['values'][IMF_INFLATION_INDICATOR]

with open(f'{SOURCE_PATH}/{IMF_INFLATION_NAME}', mode='w') as w:
    json.dump(inflation_raw, w)

Saving: inflation_raw.json


Теперь загрузим названия стран

In [12]:
IMF_GDP_INDICATOR = 'countries'
IMF_GDP_NAME = 'labels_raw.json'

In [13]:
save_source(
    url=IMF_URL.format(indicator=IMF_GDP_INDICATOR, periods=",".join(map(str, YEARS))),
    name=IMF_GDP_NAME,
    folder=SOURCE_PATH
)

with open(f'{SOURCE_PATH}/{IMF_GDP_NAME}', mode='r') as r:
    labels_raw: dict = json.load(r)[IMF_GDP_INDICATOR]

with open(f'{SOURCE_PATH}/{IMF_GDP_NAME}', mode='w') as w:
    json.dump(labels_raw, w)

Saving: labels_raw.json


На основе `ISO_A3` объединим загруженные данные и сохраним общую таблицу

In [14]:
IMF_NAME = 'imf.csv'


def get_source_imf():
    for iso_a3, values in gdp_raw.items():
        label = labels_raw.get(iso_a3)
        inflation = inflation_raw.get(iso_a3)

        if label and inflation:
            if len(values) < 4:
                continue

            for year, income in values.items():
                income = income / (1 + inflation[year] / 100)

                yield {
                    'year': year,
                    'iso_a3': iso_a3,
                    **label,
                    'income': income,
                }

def get_group_imf(df: pd.DataFrame):
    for year in df.groupby('year'):
        year[1]['group'] = pd.qcut(
            df[df['year'] == year[0]]['income'], 4,
            labels=[
                'Low income',
                'Lower-middle income',
                'Upper-middle income',
                'High income',
            ]
        )

        yield year[1]

def save_source_imf():
    imf = pd.concat(get_group_imf(pd.DataFrame(get_source_imf())))

    imf['group'] = pd.qcut(
        imf['income'], 4,
        labels=[
            'Low income',
            'Lower-middle income',
            'Upper-middle income',
            'High income',
        ]
    )

    imf.to_csv(f'{SOURCE_PATH}/{IMF_NAME}', index=False)

In [15]:
save_source_imf()

### Naturalearth
Загрузим данные с границами карт

In [16]:
NATURALEARTH_URL = 'https://naciscdn.org/naturalearth/10m/cultural/ne_10m_admin_0_countries_lakes.zip'
NATURALEARTH_NAME = 'map.zip'

In [17]:
save_source(
    url=NATURALEARTH_URL,
    name=NATURALEARTH_NAME,
    folder=SOURCE_PATH
)

Saving: map.zip


## Обработка данных

In [5]:
MERGED_PATH = 'data/merged'

### IMF & Naturalearth

Загрузим скачанные данные IMF и Naturalearth в память и объединим их в одну таблицу

In [14]:
map_df: gpd.GeoDataFrame = gpd.read_file(f'{SOURCE_PATH}/{NATURALEARTH_NAME}').set_index('ISO_A3')[['geometry']]
map_df.head()

Unnamed: 0_level_0,geometry
ISO_A3,Unnamed: 1_level_1
IDN,"MULTIPOLYGON (((117.70361 4.16341, 117.70361 4..."
MYS,"MULTIPOLYGON (((117.70361 4.16341, 117.69711 4..."
CHL,"MULTIPOLYGON (((-69.51009 -17.50659, -69.50611..."
BOL,"POLYGON ((-69.51009 -17.50659, -69.51009 -17.5..."
PER,"MULTIPOLYGON (((-69.51009 -17.50659, -69.63832..."


In [15]:
imf_df: gpd.GeoDataFrame = map_df.join(pd.read_csv(f'{SOURCE_PATH}/{IMF_NAME}', index_col='iso_a3'), how='inner').dropna()
imf_df.head()

Unnamed: 0_level_0,geometry,year,label,income,group
ISO_A3,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
ABW,"POLYGON ((-69.99694 12.57758, -69.93639 12.531...",2019,Aruba,40003.319498,Pre-high income
ABW,"POLYGON ((-69.99694 12.57758, -69.93639 12.531...",2020,Aruba,35475.139319,Pre-high income
ABW,"POLYGON ((-69.99694 12.57758, -69.93639 12.531...",2021,Aruba,40472.824324,Pre-high income
ABW,"POLYGON ((-69.99694 12.57758, -69.93639 12.531...",2022,Aruba,44696.624409,Pre-high income
AGO,"MULTIPOLYGON (((13.07370 -4.63532, 13.06533 -4...",2019,Angola,5882.390932,Lower-middle income


Для оптимизации датасета пустые значения были исключены из базы данных. Чтобы проверить, с чем мы имеем дело, проведём описательную статистику полученного датасета:

In [16]:
imf_df.groupby('year').describe()

Unnamed: 0_level_0,income,income,income,income,income,income,income,income
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max
year,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2
2019,189.0,21773.388412,22903.065752,84.34119,4719.459016,13761.48216,33021.297297,121858.04386
2020,189.0,20660.671037,21748.851386,188.428459,4722.844156,12789.143803,30118.877571,119607.087174
2021,189.0,21952.180099,23148.674676,426.524414,4785.555755,13366.498088,33321.322243,122455.782197
2022,189.0,23376.94874,24751.065288,342.252475,4871.203627,13921.402844,36184.044878,130002.85889


Таким образом, всего в наборе данных участвует 189 страны - пропущенных значений нет

### Ookla

Агрегируем полученные исходные данные Ookla по годам и объединим их с данными IMF на основе геометок

In [17]:
def get_quarter(st: Literal['fixed', 'mobile'], y: int, q: int) -> gpd.GeoDataFrame:
    """
    Read quarter data from file

    :param st: serial_type
    :param y: year
    :param q: quarter
    :return: GeoDataFrame
    """
    dt = get_datatime(y, q)
    name = OOKLA_NAME.format(dt=dt, st=st)

    print(f'Opening: {name}')

    return gpd.read_file(f'{SOURCE_PATH}/{st}/{y}/{name}')


def get_year(st: Literal['fixed', 'mobile'], y: int) -> gpd.GeoDataFrame:
    """
    Read quarter data and aggregate it to year by `quadkey`

    :param st: serial_type
    :param y: year
    :return: GeoDataFrame
    """

    data = gpd.GeoDataFrame(pd.concat([get_quarter(st, y, q) for q in range(1, 5)], join='inner', ignore_index=True))
    return data.dissolve(
        by='quadkey',
        aggfunc={
            'avg_d_kbps': 'mean',
            'avg_u_kbps': 'mean',
            'avg_lat_ms': 'mean',
            'tests': 'sum',
            'devices': 'sum',
        }
    )

In [18]:
def save_merged_by_year(st: Literal['fixed', 'mobile'], y: int) -> gpd.GeoDataFrame:
    """
    Save merged dataset by year

    :param st: serial_type
    :param y: year
    :return: GeoDataFrame
    """

    # Merge Ookla datasets with IMF data by year
    df = get_year(st, y)

    print(f'Joining with IMF datas...')

    df = df.sjoin(imf_df[imf_df['year'] == y], how='inner').rename(columns={'index_right': 'iso_a3'})

    folder = check_folder_name(f'{MERGED_PATH}/{st}')

    df.to_file(f'{folder}/{y}_{st}_merged')
    return df

def save_merged(st: Literal['fixed', 'mobile'], *df: gpd.GeoDataFrame) -> pd.DataFrame:
    """
    Save the finished dataset

    :param st: serial_type
    :param df: datasets by years
    :return: DataFrame
    """

    df = pd.concat(df, keys=YEARS)[['year', 'iso_a3', 'label', 'income', 'group', 'avg_d_kbps', 'avg_u_kbps', 'avg_lat_ms', 'tests', 'devices']]

    folder = check_folder_name(f'{MERGED_PATH}/{st}')

    df.to_csv(f'{folder}/{st}_merged.csv')
    return df

#### fixed
Данные Ookla подразделяются на мобильные и фиксированные. Начнём с первых

Загрузим скачанные фиксированные данные Ookla в память и агрегируем их по годам

In [31]:
merged_2019 = save_merged_by_year('fixed', 2019)
merged_2019.head()

Opening: 2019-01_fixed_tiles.zip
Opening: 2019-04_fixed_tiles.zip
Opening: 2019-07_fixed_tiles.zip
Opening: 2019-10_fixed_tiles.zip
Joining with IMF datas...


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
22133222312322,"POLYGON ((-160.02136 70.64359, -160.02136 70.6...",8200.5,3705.0,53.0,2,2,USA,2019,United States,63738.780607,High income
22133222330013,"POLYGON ((-160.02686 70.63995, -160.02686 70.6...",9255.5,3189.5,43.5,2,2,USA,2019,United States,63738.780607,High income
22133222330023,"POLYGON ((-160.03784 70.63448, -160.04333 70.6...",7946.0,3492.5,44.5,6,5,USA,2019,United States,63738.780607,High income
22133222330032,"POLYGON ((-160.03784 70.63631, -160.03235 70.6...",7384.0,3868.0,65.0,1,1,USA,2019,United States,63738.780607,High income
22133222330100,"POLYGON ((-160.02686 70.64177, -160.02136 70.6...",8895.0,3429.0,43.0,2,2,USA,2019,United States,63738.780607,High income


In [32]:
merged_2020 = save_merged_by_year('fixed', 2020)
merged_2020.head()

Opening: 2020-01_fixed_tiles.zip
Opening: 2020-04_fixed_tiles.zip
Opening: 2020-07_fixed_tiles.zip
Opening: 2020-10_fixed_tiles.zip
Joining with IMF datas...


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
22133222312322,"POLYGON ((-160.02686 70.64359, -160.02136 70.6...",5227.0,2449.0,54.0,2,2,USA,2020,United States,62576.123031,High income
22133222313202,"POLYGON ((-160.00488 70.64723, -159.99939 70.6...",8630.0,3638.0,44.0,1,1,USA,2020,United States,62576.123031,High income
22133222330010,"POLYGON ((-160.03784 70.64177, -160.03235 70.6...",9529.0,3214.0,44.0,1,1,USA,2020,United States,62576.123031,High income
22133222330011,"POLYGON ((-160.03235 70.64177, -160.02686 70.6...",9392.0,3390.0,42.0,1,1,USA,2020,United States,62576.123031,High income
22133222330012,"POLYGON ((-160.03784 70.63995, -160.03235 70.6...",8644.0,3640.0,64.0,1,1,USA,2020,United States,62576.123031,High income


In [33]:
merged_2021 = save_merged_by_year('fixed', 2021)
merged_2021.head()

Opening: 2021-01_fixed_tiles.zip
Opening: 2021-04_fixed_tiles.zip
Opening: 2021-07_fixed_tiles.zip
Opening: 2021-10_fixed_tiles.zip
Joining with IMF datas...


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
22133222330013,"POLYGON ((-160.03235 70.63995, -160.02686 70.6...",7750.0,2808.0,41.0,3,2,USA,2021,United States,65325.674115,High income
22133222330023,"POLYGON ((-160.04333 70.63631, -160.03784 70.6...",863.0,413.0,43.0,1,1,USA,2021,United States,65325.674115,High income
22133222330030,"POLYGON ((-160.03784 70.63813, -160.03235 70.6...",10840.0,25007.0,28.0,2,2,USA,2021,United States,65325.674115,High income
22133222330031,"POLYGON ((-160.03235 70.63813, -160.02686 70.6...",14002.0,23610.0,27.0,1,1,USA,2021,United States,65325.674115,High income
22133222330032,"POLYGON ((-160.03235 70.63631, -160.03235 70.6...",23073.0,26313.666667,28.333333,3,3,USA,2021,United States,65325.674115,High income


In [19]:
merged_2022 = save_merged_by_year('fixed', 2022)
merged_2022.head()

Opening: 2022-01_fixed_tiles.zip
Opening: 2022-04_fixed_tiles.zip
Opening: 2022-07_fixed_tiles.zip
Opening: 2022-10_fixed_tiles.zip
Joining with IMF datas...


Unnamed: 0_level_0,geometry,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices,iso_a3,year,label,income,group
quadkey,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
22133222312233,"POLYGON ((-160.03235 70.64359, -160.02686 70.6...",125266.0,15523.0,127.0,1,1,USA,2022,United States,71621.476548,High income
22133222312322,"POLYGON ((-160.02136 70.64359, -160.02136 70.6...",66682.666667,8126.0,95.666667,22,5,USA,2022,United States,71621.476548,High income
22133222330013,"POLYGON ((-160.03235 70.63995, -160.02686 70.6...",223427.0,23185.0,91.0,1,1,USA,2022,United States,71621.476548,High income
22133222330023,"POLYGON ((-160.04333 70.63631, -160.03784 70.6...",125703.0,14623.0,270.0,3,1,USA,2022,United States,71621.476548,High income
22133222330032,"POLYGON ((-160.03784 70.63631, -160.03235 70.6...",1773.0,31968.0,33.0,1,1,USA,2022,United States,71621.476548,High income


Объединим полученные датасеты в панельные данные.
*Для объединения данных за 4 года необходимо более 32 ГБ ОЗУ*

In [20]:
merged_df = save_merged('fixed', merged_2019, merged_2020, merged_2021, merged_2022)
merged_df.head()

Unnamed: 0,Unnamed: 1,year,iso_a3,label,income,group,avg_d_kbps,avg_u_kbps,avg_lat_ms,tests,devices
2019,0,2019,USA,United States,63738.780607,High income,8200.5,3705.0,53.0,2,2
2019,1,2019,USA,United States,63738.780607,High income,9255.5,3189.5,43.5,2,2
2019,2,2019,USA,United States,63738.780607,High income,7946.0,3492.5,44.5,6,5
2019,3,2019,USA,United States,63738.780607,High income,7384.0,3868.0,65.0,1,1
2019,4,2019,USA,United States,63738.780607,High income,8895.0,3429.0,43.0,2,2


#### mobile
Проделаем то же самое с мобильными данными

Загрузим скачанные фиксированные данные Ookla в память и агрегируем их по годам

In [None]:
merged_2019 = save_merged_by_year('mobile', 2019)
merged_2019.head()

In [None]:
merged_2020 = save_merged_by_year('mobile', 2020)
merged_2020.head()

In [None]:
merged_2021 = save_merged_by_year('mobile', 2021)
merged_2021.head()

In [None]:
merged_2022 = save_merged_by_year('mobile', 2022)
merged_2022.head()

Объединим полученные датасеты в панельные данные

In [None]:
merged_df = save_merged('mobile', merged_2019, merged_2020, merged_2021, merged_2022)
merged_df.head()

## Light
Создадим оптимизированные данные на основе полученных агрегированных для оптимизации их дальнейшей визуализации

In [6]:
LIGHT_PATH = 'data/light'

### Ookla

In [9]:
AGG = {
    'label': 'first',
    'income': 'first',
    'group': 'first',
    'avg_d_kbps': 'median',
    'avg_u_kbps': 'median',
    'avg_lat_ms': 'median',
    'tests': 'sum',
    'devices': 'sum',
}


def save_light_by_year(st: Literal['fixed', 'mobile'], y: int) -> gpd.GeoDataFrame:
    """
    Aggregate to counties and save Ookla dataset by year

    :param st: serial_type
    :param y: year
    :return: GeoDataFrame
    """

    df = gpd.read_file(f'{MERGED_PATH}/{st}/{y}_{st}_merged').dissolve(by='iso_a3', aggfunc=AGG)  # aggregate values by countries

    folder = check_folder_name(f'{LIGHT_PATH}/{st}')

    df.to_file(f'{folder}/{y}_{st}_light')
    return df

def save_light(st: Literal['fixed', 'mobile']) -> pd.DataFrame:
    df = pd.read_csv(f'{MERGED_PATH}/{st}/{st}_merged.csv').groupby(by=['iso_a3', 'year']).agg(AGG)  # aggregate values by countries and years

    folder = check_folder_name(f'{LIGHT_PATH}/{st}')

    df.to_csv(f'{folder}/{st}_light.csv')
    return df

#### fixed

In [None]:
light_2019 = save_light_by_year('fixed', 2019)
light_2019.head()

In [None]:
light_2020 = save_light_by_year('fixed', 2020)
light_2020.head()

In [None]:
light_2021 = save_light_by_year('fixed', 2021)
light_2021.head()

In [None]:
light_2022 = save_light_by_year('fixed', 2022)
light_2022.head()

In [10]:
light = save_light('fixed')
light.head()

KeyError: "Column(s) ['amount'] do not exist"

#### mobile

In [None]:
light_2019 = save_light_by_year('mobile', 2019)
light_2019.head()

In [None]:
light_2020 = save_light_by_year('mobile', 2020)
light_2020.head()

In [None]:
light_2021 = save_light_by_year('mobile', 2021)
light_2021.head()

In [None]:
light_2022 = save_light_by_year('mobile', 2022)
light_2022.head()

In [None]:
light = save_light('mobile')
light.head()

На этом этап по подготовке данных окончен