In [1]:
import os
os.chdir("..")

from pprint import pprint
from tqdm.auto import tqdm
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

from tuxai.dataset import Dataset, Columns
from tuxai.misc import config_logger, get_config
# from tuxai.report import model_metrics, Report, FeatureImportanceReport

config_logger()
pd.options.display.max_rows = 300
pd.options.display.max_colwidth = 100


# What is the best compressed target (lower size)

In [2]:
d = {version: Dataset(version).get_dataframe(Columns.targets).mean().sort_values().head(1) for version in get_config()["report"]["versions"]}
pprint(d)

{413: XZ    6.171931
dtype: float64,
 415: XZ    8.538569
dtype: float64,
 420: XZ    8.567795
dtype: float64,
 500: XZ    8.897627
dtype: float64,
 504: XZ    11.160873
dtype: float64,
 507: XZ    11.165376
dtype: float64,
 508: XZ    9.813614
dtype: float64}


In [3]:
Dataset(413).get_dataframe(Columns.targets).mean().sort_values()

XZ                6.171931
XZ-bzImage        6.321029
LZMA              6.765994
LZMA-bzImage      6.906686
BZIP2             8.198103
XZ-vmlinux        8.309487
GZIP              8.456118
GZIP-bzImage      8.606845
LZMA-vmlinux      8.894736
LZO               9.403426
LZO-bzImage       9.541922
LZ4              10.084096
LZ4-bzImage      10.234466
GZIP-vmlinux     10.595388
LZO-vmlinux      11.529966
LZ4-vmlinux      12.222827
BZIP2-bzImage    12.274483
BZIP2-vmlinux    14.255325
vmlinux          47.334369
dtype: float64

# Overview report

In [4]:
from tuxai.report import OverviewReport
ovr = OverviewReport(fir_cache="fi_const_2023")

2023-02-28 11:46:51,472 :: INFO :: tuxai.report :: loading feature importance data from cache: fi_const_2023


  0%|          | 0/12369 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

2023-02-28 11:48:34,137 :: INFO :: tuxai.report :: loading feature importance data from cache: fi_const_2023


  0%|          | 0/12369 [00:00<?, ?it/s]

  0%|          | 0/7 [00:00<?, ?it/s]

In [5]:
ovr._ranks_and_yes_frequencies["vmlinux"]

Unnamed: 0,4.13,4.15,4.20,5.00,5.04,5.07,5.08,freq 4.13,freq 4.15,freq 4.20,freq 5.00,freq 5.04,freq 5.07,freq 5.08,outliers_score,outliers
UBSAN_ALIGNMENT,8,8,7,8,9,9,9,0.16685,0.250946,0.238409,0.256265,0.748868,0.121931,0.126579,0.476033,[0.7488683406198011]
PINCTRL,8432,8526,9176,9311,9700,10168,10340,0.314805,0.659761,0.86185,0.882618,0.880721,0.882732,0.880901,0.451393,[0.3148052116419265]
PINMUX,5686,704,6392,6458,6732,7102,7160,0.314751,0.643385,0.85457,0.878508,0.877742,0.878317,0.870091,0.444872,[0.3147511937944297]
CEC_NOTIFIER,4736,4895,5381,5469,506,5857,5978,0.073421,0.21304,0.543531,0.550571,0.562154,0.56858,0.691967,0.384188,[0.07342105831766815]
VIDEO_V4L2_SUBDEV_API,5979,6132,6043,6114,7052,7448,7480,0.138804,0.137787,0.140236,0.139234,0.152358,0.146585,0.539935,0.340658,[0.5399352278429047]
PM_OPP,6756,6884,7494,836,7196,8322,8414,0.322314,0.633458,0.637362,0.637079,0.7202,0.816658,0.824796,0.333667,[0.3223136924439835]
RPMSG,492,5823,6317,6436,6651,7037,7099,0.268112,0.615178,0.623015,0.669006,0.661702,0.668138,0.667655,0.328003,[0.2681121842656814]
CRYPTO_SEQIV,5087,5229,5751,5817,5902,6245,6394,0.960643,0.969558,0.971646,0.971532,0.955469,0.953867,0.611778,0.301721,[0.6117775851845094]
PRIME_NUMBERS,216,107,792,783,853,5101,5102,0.262073,0.264377,0.257695,0.257468,0.255426,0.260182,0.594946,0.287494,[0.594945947178762]
MEDIA_CONTROLLER,8458,8628,9335,468,9815,10335,10527,0.285139,0.290451,0.294138,0.287039,0.314737,0.302793,0.602928,0.263325,[0.6029284313278293]


In [6]:
ovr._ranks_and_yes_frequencies["vmlinux"].loc["PINMUX"]

4.13                              5686
4.15                               704
4.20                              6392
5.00                              6458
5.04                              6732
5.07                              7102
5.08                              7160
freq 4.13                     0.314751
freq 4.15                     0.643385
freq 4.20                      0.85457
freq 5.00                     0.878508
freq 5.04                     0.877742
freq 5.07                     0.878317
freq 5.08                     0.870091
outliers_score                0.444872
outliers          [0.3147511937944297]
Name: PINMUX, dtype: object

In [8]:
ovr._missing_options

{413: ['CLK_RCAR_GEN2_CPG',
  'LCS',
  'SND_SOC_TEGRA20_I2S',
  'ATMEL_TCLIB',
  'ARCH_BINFMT_ELF_STATE',
  'UIO_FSL_ELBC_GPCM_NETX5152',
  'PINCTRL_KIRKWOOD',
  'PCI_GOMMCONFIG',
  'RADIO_AZTECH',
  'PARPORT_MFC3',
  'CLK_R8A7791',
  'ARM_PMU_ACPI',
  'DRM_MSM',
  'HAVE_EXIT_THREAD',
  'FB_OLPC_DCON_1_5',
  'USB_FUNCTIONFS_GENERIC',
  'QE_USB',
  'AU1000_FIR',
  'SERIAL_8250_EM',
  'USB_UHCI_BIG_ENDIAN_MMIO',
  'XILINX_HWICAP',
  'RENESAS_H8S_INTC',
  'USB_BCM63XX_UDC',
  'KERNEL_LZ4',
  'SCLP_ASYNC',
  'DASD',
  'HAVE_KERNEL_GZIP',
  'HAVE_DMA_API_DEBUG',
  'MK6',
  'MTD_NAND_CM_X270',
  'ARM_PMU',
  'SCSI_IBMVFC_TRACE',
  'CRYPTO_DEV_UX500_HASH',
  'RTC_DRV_MXC',
  'RTC_DRV_AB8500',
  'SERIAL_MPSC',
  'CLK_R8A7796',
  'PINCTRL_IMX6SX',
  'VIRT_TO_BUS',
  'EDAC_CELL',
  'SERIAL_8250_OMAP',
  'VIDEO_IMX_MEDIA',
  'CPU_FREQ_PMAC64',
  'BLK_DEV_IDE_AU1XXX_PIO_DBDMA',
  'PCIE_RCAR',
  'SERIAL_BFIN_CONSOLE',
  'SENSORS_AMS_I2C',
  'DRM_MSM_DSI_28NM_8960_PHY',
  'BLK_DEV_IDEDMA_ICS',
  'PA

In [9]:
{
    version: ("CC_OPTIMIZE_FOR_SIZE" in Dataset(version).get_dataframe(Columns.options, group_collinear_options=False).columns)
    for version in get_config()["report"]["versions"]
}

    

{413: True, 415: True, 420: True, 500: True, 504: True, 507: True, 508: True}

In [7]:
df["CC_OPTIMIZE_FOR_SIZE"]

0        False
1        False
2        False
3        False
4        False
         ...  
92557     True
92558     True
92559     True
92560     True
92561     True
Name: CC_OPTIMIZE_FOR_SIZE, Length: 92562, dtype: bool

In [2]:
df = Dataset(508).get_dataframe(Columns.targets)

In [3]:
type(df.iloc[0])

pandas.core.series.Series