In [1]:
!pip install pyspark

Collecting pyspark
  Downloading pyspark-3.5.2.tar.gz (317.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m317.3/317.3 MB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pyspark
  Building wheel for pyspark (setup.py) ... [?25l[?25hdone
  Created wheel for pyspark: filename=pyspark-3.5.2-py2.py3-none-any.whl size=317812365 sha256=78a41ecfc9aed255ca6273536a886bb7307b93bca7ef6c1194d866311824e15f
  Stored in directory: /root/.cache/pip/wheels/34/34/bd/03944534c44b677cd5859f248090daa9fb27b3c8f8e5f49574
Successfully built pyspark
Installing collected packages: pyspark
Successfully installed pyspark-3.5.2


In [2]:
from pyspark.sql import SparkSession
from pyspark.ml import PipelineModel
from pyspark.sql.types import FloatType, IntegerType
from pyspark.sql import SparkSession
from pyspark.ml.pipeline import PipelineModel
import zipfile
import os

In [3]:


def apply_model_to_data_string(model_zip_path, data_string):
    """
    Загружает модель машинного обучения из указанного zip-файла и применяет её к данным, поданным в виде строки.

    Параметры:
    - model_zip_path (str): Путь к zip-файлу, содержащему сохраненную модель машинного обучения.
    - data_string (str): Строка с данными, разделенными запятыми.
    """

    with zipfile.ZipFile(model_zip_path, 'r') as zip_ref:
        zip_ref.extractall("unzipped_model")

    # Инициализация Spark Session
    spark = SparkSession.builder.appName("ModelUsageApp").getOrCreate()


    model_path = os.path.join("unzipped_model", "lr_model")
    model = PipelineModel.load(model_path)

    # Преобразование данных из строки в формат DataFrame
    data_list = data_string.split(",")
    data_dict = {
        "battery_power": int(data_list[0]),
        "blue": int(data_list[1]),
        "clock_speed": float(data_list[2]),
        "dual_sim": int(data_list[3]),
        "fc": int(data_list[4]),
        "four_g": int(data_list[5]),
        "int_memory": int(data_list[6]),
        "m_dep": float(data_list[7]),
        "mobile_wt": int(data_list[8]),
        "n_cores": int(data_list[9]),
        "pc": int(data_list[10]),
        "px_height": int(data_list[11]),
        "px_width": int(data_list[12]),
        "ram": int(data_list[13]),
        "sc_h": int(data_list[14]),
        "sc_w": int(data_list[15]),
        "talk_time": int(data_list[16]),
        "three_g": int(data_list[17]),
        "touch_screen": int(data_list[18]),
        "wifi": int(data_list[19])
    }

    data_df = spark.createDataFrame([data_dict])

    # Применение модели
    predictions = model.transform(data_df)

    # Показать результаты
    predictions.select("prediction", "features").show()



In [5]:
model_zip_path = 'lr_model.zip'
data_string = "842,0,2.2,0,1,0,7,0.6,188,2,2,20,756,2549,9,7,19,0,0,1"  # Данные в виде строки
apply_model_to_data_string(model_zip_path, data_string)


+----------+--------------------+
|prediction|            features|
+----------+--------------------+
|       1.0|[842.0,0.0,2.2,0....|
+----------+--------------------+

