In [1]:
#TIMESFILE='../data/ankara-basics/times_par2_to3600.csv.gz'
TIMESFILE='../data/xtra-ankara/times_par10_to3600.csv.gz'
FEATSFILE='../data/solving-and-features/features-sumpb.csv.gz'
IDCOLS= ['ProblemDir','ParamFile']
DISPLAYCOLS=2

import math
import numpy as np
import pandas as pd

t = pd.read_csv(TIMESFILE)
f = pd.read_csv(FEATSFILE)

instances = t[IDCOLS].merge(f[IDCOLS]).drop_duplicates()

In [2]:
print(t[IDCOLS].drop_duplicates())
print(f[IDCOLS].drop_duplicates())

        ProblemDir                       ParamFile
0             bacp   bacp-m1-05-c18-xcsp2018.param
81            bacp   bacp-m1-06-c18-xcsp2018.param
162           bacp  bacp-m1-07a-c18-xcsp2018.param
243           bacp  bacp-m1-07b-c18-xcsp2018.param
324           bibd          bibd-miguel-hard.param
...            ...                             ...
49965  tickTackToe              tickTackToe7.param
50046  waterBucket              waterBucket1.param
50127  waterBucket              waterBucket2.param
50208  waterBucket              waterBucket3.param
50289  waterBucket              waterBucket4.param

[625 rows x 2 columns]
                ProblemDir             ParamFile
0                semigroup         semi-14.param
1                semigroup         semi-09.param
2                semigroup         semi-11.param
3                semigroup         semi-06.param
4                semigroup         semi-10.param
..                     ...                   ...
816  quasiGroup4Idemp

## Hmmm, still some instances without sums or PBs?

In [3]:
corpus = instances.merge(f[IDCOLS+['sarofe_pbs_count','sarofe_sums_count']])
corpus.columns=['model','param','n_pbs','n_sums']
print("WITHOUT SUMS OR PBs!!!\n",corpus.loc[(corpus.n_pbs+corpus.n_sums) == 0,['model','param']])
corpus = corpus.loc[(corpus.n_pbs+corpus.n_sums) > 0]
summary = corpus.groupby('model').aggregate({'param':'count','n_pbs':'mean','n_sums':'mean'})
summary = summary.reset_index()
summary = summary.sort_values('param',ascending=False)
print(summary)
print("Total # of classes shown:", len(summary))
print("Total # of instances shown:", summary.param.sum())

WITHOUT SUMS OR PBs!!!
          model          param
580  semigroup  semi-05.param
581  semigroup  semi-06.param
582  semigroup  semi-07.param
583  semigroup  semi-08.param
584  semigroup  semi-09.param
585  semigroup  semi-10.param
586  semigroup  semi-11.param
587  semigroup  semi-12.param
588  semigroup  semi-13.param
589  semigroup  semi-14.param
590  semigroup  semi-15.param
                       model  param        n_pbs       n_sums
16             killerSudoku2     50  1811.200000   129.920000
6              carSequencing     49   435.714286     0.000000
18                   knights     44   170.454545   336.909091
19                  langford     39   146.205128     0.000000
27                       opd     36    21.888889    76.222222
17                  knapsack     28     1.000000     1.000000
45                    sonet2     24    10.000000     1.000000
15               immigration     23     0.000000     1.000000
2               bibd-implied     22   410.590909     0.000

In [4]:
a_freqs = summary.to_numpy()
items_needed = math.ceil(len(a_freqs) / DISPLAYCOLS)*DISPLAYCOLS - len(a_freqs)
if items_needed>0:
    extra_rows = np.array([[' ',0,0,0] for _ in range(items_needed)])
    filled_freqs = np.append(a_freqs,extra_rows,axis=0)
else:
    filled_freqs = a_freqs

wide = filled_freqs.reshape(-1,4*DISPLAYCOLS)

def _show(x):
    if type(x)==float:
        return f"{x:.1f}"
    else:
        return str(x).replace("_",r"\_")

print("\\\\\n".join([" & ".join(map(_show,row)) for row in wide]))


killerSudoku2 & 50 & 1811.2 & 129.9 & carSequencing & 49 & 435.7 & 0.0\\
knights & 44 & 170.5 & 336.9 & langford & 39 & 146.2 & 0.0\\
opd & 36 & 21.9 & 76.2 & knapsack & 28 & 1.0 & 1.0\\
sonet2 & 24 & 10.0 & 1.0 & immigration & 23 & 0.0 & 1.0\\
bibd-implied & 22 & 410.6 & 0.0 & efpa & 21 & 162.8 & 0.0\\
handball7 & 20 & 705.0 & 1206.0 & mrcpsp-pb & 20 & 90.0 & 45.7\\
n\_queens & 20 & 1593.0 & 0.0 & bibd & 19 & 338.7 & 0.0\\
briansBrain & 16 & 0.0 & 1.0 & life & 16 & 0.0 & 438.9\\
molnars & 16 & 0.0 & 4.0 & n\_queens2 & 16 & 309.0 & 0.0\\
bpmp & 14 & 14.0 & 0.0 & blackHole & 11 & 202.2 & 0.0\\
pegSolitaireTable & 8 & 59.9 & 0.0 & pegSolitaireState & 8 & 59.9 & 0.0\\
pegSolitaireAction & 8 & 59.9 & 0.0 & magicSquare & 7 & 136.0 & 36.0\\
peaceableArmyOfQueens1 & 7 & 0.0 & 1008.0 & peacefulArmyQueens3 & 6 & 0.0 & 4.0\\
quasiGroup5Idempotent & 6 & 586.7 & 0.0 & golomb & 6 & 59.2 & 38.7\\
quasiGroup7 & 6 & 410.7 & 0.0 & quasiGroup6 & 6 & 410.7 & 0.0\\
quasiGroup4NonIdempotent & 4 & 1067.5 & 

## Curious how often the are more LIs or PBs

In [20]:
print("Some PBs ",len(corpus.loc[corpus.n_pbs>0]))
print("Some sums",len(corpus.loc[corpus.n_sums>0]))
print("More PBs",len(corpus.loc[corpus.n_pbs>corpus.n_sums]))
print("More LIs",len(corpus.loc[corpus.n_pbs<corpus.n_sums]))
print("Same    ",len(corpus.loc[corpus.n_pbs==corpus.n_sums]))

Some PBs  519
Some sums 356
More PBs 383
More LIs 202
Same     30


## What about the sizes?

In [19]:
_feats = f.merge(corpus, left_on=IDCOLS,right_on=['model','param'])

print(_feats.loc[_feats.sarofe_pbs_count>0,['sarofe_pbs_n_med']].describe())
print(_feats.loc[_feats.sarofe_sums_count>0,['sarofe_sums_n_med']].describe())


       sarofe_pbs_n_med
count        519.000000
mean          36.932563
std           45.244907
min            1.000000
25%            8.500000
50%           18.000000
75%           48.500000
max          280.000000
       sarofe_sums_n_med
count         356.000000
mean         5917.533708
std         44209.932288
min             4.000000
25%            21.000000
50%            36.000000
75%           117.750000
max        521285.000000
