In [1]:
from IPython.display import display

import numpy as np
import pandas as pd

from conf.settings import (
    get_settings, Settings,
    get_read_file_settings, ReadFileSettings,
)
from recsys_tasks.processors.sparse_matrix import SparseMatrixProcessor


In [2]:
# Получение конфигурации для расчетов.
settings: Settings = get_settings()
read_file_settings: ReadFileSettings = get_read_file_settings()

In [3]:
# Получение матричного представления исходных данных.
input_data_filename = '3_sparse_matrix.csv'
input_data_filepath = settings.INPUT_DATA_DIR / input_data_filename

source_data: np.ndarray = np.genfromtxt(
    input_data_filepath,
    dtype=float,
    **read_file_settings.model_dump(),
)
source_data: np.ndarray[tuple[int, int], int]
pd.DataFrame(source_data)

Unnamed: 0,0,1,2,3,4,5,6
0,2.0,0.0,0.0,0.0,3.0,0.0,0.0
1,0.0,0.0,3.0,4.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,5.0,0.0,0.0,4.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,5.0,4.0,0.0,0.0,0.0,5.0


In [4]:
# Инициализация процессора
processor = SparseMatrixProcessor(source_data)

### 1. value-row-col

In [5]:
values, rows, columns = processor.separated

print('Значения:')
display(values)

print('Строки:')
display(rows)

print('Столбцы:')
display(columns)

Значения:


array([2., 3., 3., 4., 5., 4., 5., 4., 5.])

Строки:


array([0, 0, 1, 1, 3, 3, 5, 5, 5])

Столбцы:


array([0, 4, 2, 3, 3, 6, 1, 2, 6])

### 2. value-col-row_index

In [6]:
values, columns, row_index = processor.row_index

print('Значения:')
display(values)

print('Столбцы:')
display(rows)

print('Индексы строк:')
display(columns)

Значения:


array([2., 3., 3., 4., 5., 4., 5., 4., 5.])

Столбцы:


array([0, 0, 1, 1, 3, 3, 5, 5, 5])

Индексы строк:


array([0, 4, 2, 3, 3, 6, 1, 2, 6])

### 3. values-columns

In [7]:
values_mx, columns_mx = processor.value_columns

print('Матрица значений:')
display(pd.DataFrame(values_mx))

print('Матрица индексов столбцов:')
display(pd.DataFrame(columns_mx))

Матрица значений:


Unnamed: 0,0,1,2
0,2.0,3.0,0.0
1,3.0,4.0,0.0
2,0.0,0.0,0.0
3,5.0,4.0,0.0
4,0.0,0.0,0.0
5,5.0,4.0,5.0


Матрица индексов столбцов:


Unnamed: 0,0,1,2
0,0.0,4.0,0.0
1,2.0,3.0,0.0
2,0.0,0.0,0.0
3,3.0,6.0,0.0
4,0.0,0.0,0.0
5,1.0,2.0,6.0


### 4. reformed matrix

In [8]:
pd.DataFrame(
    processor.get_reformed_matrix(
        clear_zero_rated=True,
        clear_inactive_users=True,
        min_rating=1.15,
    )
)

Unnamed: 0,0,1,2,3,4,5
0,0.0,0.0,3.0,4.0,0.0,0.0
1,0.0,0.0,0.0,5.0,0.0,4.0
2,0.0,5.0,4.0,0.0,0.0,5.0
