# Проанализируем данные

In [1]:
import pandas as pd
import polars as pl


def read(n: str, **kwargs):
    return pl.read_csv(f"../data/{n}", **kwargs).to_pandas()


public = read("public/data.csv")
private = read("private/data.csv")

In [5]:
def analayze(data: pd.DataFrame):
    missing_values_count = data.isnull().sum()
    missing_values_percent = round(
        (data.isnull().sum() / len(data)) * 100
    ).astype(int)
    unique_values_count = data.nunique()
    unique_values_percent = round((data.nunique() / len(data)) * 100).astype(
        int
    )

    return pd.DataFrame(
        {
            "Missing Values Count": missing_values_count,
            "Missing Values Percent (%)": missing_values_percent,
            "Unique Values Count": unique_values_count,
            "Unique Values Percent (%)": unique_values_percent,
        }
    )

In [4]:
analayze(public)

Unnamed: 0,Missing Values Count,Missing Values Percent (%),Unique Values Count,Unique Values Percent (%)
blend_id,0,0,341,25
smiles,0,0,53,4
oil_property_param_value,12,1,322,23


In [6]:
analayze(private.loc[private["oil_property_param_title"] == "ad7e6027-00b8-4c27-918c-d1561f949ad8"])

Unnamed: 0,Missing Values Count,Missing Values Percent (%),Unique Values Count,Unique Values Percent (%)
oil_type,588,1,4,0
blend_id,0,0,343,1
oil_property_param_title,0,0,1,0
oil_property_param_value,411,1,324,1
component_name,0,0,109,0
component_class,16775,38,13,0
polymer,16775,38,2,0
component_property_param_title,0,0,43,0
component_property_param_value,28402,64,1015,2
smiles,16775,38,51,0


In [3]:
private.loc[private["oil_property_param_title"] == "ad7e6027-00b8-4c27-918c-d1561f949ad8"]

Unnamed: 0,oil_type,blend_id,oil_property_param_title,oil_property_param_value,component_name,component_class,polymer,component_property_param_title,component_property_param_value,smiles
358,3fa07e0a-415c-496d-b88b-557855cb3e77,49743a76-a614-11ee-9529-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,103300.0,615537f6-1f8f-4240-a5e9-8f7be344ecd3,базовое масло 1 гр,no,02236ee6-5eec-4368-a2e4-6f2e73fb0f96,0.0000,CCCCC
359,3fa07e0a-415c-496d-b88b-557855cb3e77,49743a76-a614-11ee-9529-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,103300.0,615537f6-1f8f-4240-a5e9-8f7be344ecd3,базовое масло 1 гр,no,2511714c-ab50-4566-bc92-8e4095d87d01,0.0000,CCCCC
360,3fa07e0a-415c-496d-b88b-557855cb3e77,49743a76-a614-11ee-9529-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,103300.0,615537f6-1f8f-4240-a5e9-8f7be344ecd3,базовое масло 1 гр,no,38b06d9a-bcf2-40de-8e1b-9c3988c42406,0.0001,CCCCC
361,3fa07e0a-415c-496d-b88b-557855cb3e77,49743a76-a614-11ee-9529-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,103300.0,615537f6-1f8f-4240-a5e9-8f7be344ecd3,базовое масло 1 гр,no,f216a0c7-d453-4b15-9b4a-7647cbe2d874,0.0990,CCCCC
362,3fa07e0a-415c-496d-b88b-557855cb3e77,49743a76-a614-11ee-9529-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,103300.0,615537f6-1f8f-4240-a5e9-8f7be344ecd3,базовое масло 1 гр,no,9703e283-f529-4fdb-8d84-24413b2b4338,0.0001,CCCCC
...,...,...,...,...,...,...,...,...,...,...
359362,60c1209c-8e77-467e-a01f-b6321801a260,45f1e44a-9410-11ee-8abf-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,12510.0,1433126f-b36a-4e69-ad2f-ee12d8b46275,,,a59e9688-c5e5-4488-a911-2fc21e0355be,,
359363,60c1209c-8e77-467e-a01f-b6321801a260,45f1e44a-9410-11ee-8abf-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,12510.0,1433126f-b36a-4e69-ad2f-ee12d8b46275,,,f400be8b-3bed-4f17-8a12-9fabd9b73586,,
359364,60c1209c-8e77-467e-a01f-b6321801a260,45f1e44a-9410-11ee-8abf-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,12510.0,1433126f-b36a-4e69-ad2f-ee12d8b46275,,,1931ea10-9783-4065-a77c-04532d7d0559,,
359365,60c1209c-8e77-467e-a01f-b6321801a260,45f1e44a-9410-11ee-8abf-005056921581,ad7e6027-00b8-4c27-918c-d1561f949ad8,12510.0,1433126f-b36a-4e69-ad2f-ee12d8b46275,,,c11112c4-487f-4e2c-812d-84a29b240b69,,
