# Feature comparison between IDEAS, CP and SCIP features

In this notebook, we will compare feature sets generated using IDEAS, CellProfiler and SCIP. 

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from scip_workflows.common import *


In [None]:
import seaborn
from scipy.stats import pearsonr, spearmanr
from sklearn.preprocessing import scale

from scip_workflows import core


## Loading IDEAS and SCIP feature sets

In [None]:
try:
    features = snakemake.input.features
    index = snakemake.input.index
    columns = snakemake.input.columns
    ideas = snakemake.input.ideas
    labels = snakemake.input.labels
except NameError:
    data_root = Path("/home/maximl/scratch/data/vsc/datasets/wbc/")
    # data_root = Path(os.environ["VSC_DATA_VO_USER"]) / "datasets/wbc"
    data_scip = data_root / "scip/131020222139/"
    features = data_scip / "WBC_features.parquet"
    labels = data_scip / "labels.parquet"
    index = data_scip / "indices/index.npy"
    columns = data_scip / "indices/columns.npy"
    ideas = data_root / "ideas" / "WBC_ideas_features.parquet"


In [None]:
df_scip = pq.read_table(features).to_pandas()

df_scip = df_scip[numpy.load(columns, allow_pickle=True)]
df_scip = df_scip.loc[numpy.load(index, allow_pickle=True)]

labels = pq.read_table(labels).to_pandas()
df_scip = df_scip.merge(labels, left_index=True, right_index=True)
# df_scip["meta_label"] = pandas.Categorical(df_scip["meta_label"], ordered=True)

df_scip.shape


In [None]:
df_ideas = pq.read_table(ideas).to_pandas()


# Comparing features between IDEAS and SCIP

In [None]:
df_joined = df_scip.rename(columns=lambda a: "scip_" + a).join(
    df_ideas.rename(columns=lambda a: "ideas_" + a), how="inner"
)
df_joined.shape


In [None]:
def rename(c):
    if c.startswith("scip_meta"):
        return "meta_" + c[10:]
    return c


df_joined = df_joined.rename(columns=rename)


In [None]:
def compare_features(df, x1, x2):
    fig, ax = plt.subplots(dpi=150)
    ax.set_xlabel(x1)
    ax.set_ylabel(x2)
    sorted_index = df[x1].sort_values().index
    z1 = df[x1]
    z2 = df[x2]

    # sorted_index = numpy.argsort(z1)
    # z = numpy.polyfit(z1, z2, 1)
    # y_hat = numpy.poly1d(z)(z1[sorted_index])
    # ax.plot(z1[sorted_index], y_hat, "--", lw=0.5, color="grey")
    ax.scatter(
        z1.loc[sorted_index], z2.loc[sorted_index], s=0.5, alpha=0.5, edgecolors="none"
    )
    # ax.set_title(r'pearson = %.4f, spearman = %.4f' % (pearsonr(z1,z2)[0], spearmanr(z1,z2)[0]))

    return ax


### Texture

In [None]:
compare_features(
    df_joined,
    "ideas_feat_hcontrastmeanm06ssc5",
    "scip_feat_li_glcm_mean_homogeneity_5_SSC",
)


### Shape

In [None]:
compare_features(
    df_joined, "ideas_feat_circularitym01", "scip_feat_li_eccentricity_BF1"
)


In [None]:
compare_features(
    df_joined, "ideas_feat_majoraxism01", "scip_feat_li_major_axis_length_BF1"
)


In [None]:
compare_features(df_joined, "ideas_feat_aream01", "scip_feat_li_area_BF1")


In [None]:
compare_features(df_joined, "ideas_feat_aream02", "scip_feat_li_area_CD15")


In [None]:
compare_features(df_joined, "ideas_feat_aream03", "scip_feat_li_area_Siglec8")


In [None]:
compare_features(df_joined, "ideas_feat_perimeterm01", "scip_feat_li_perimeter_BF1")


In [None]:
compare_features(
    df_joined, "ideas_feat_majoraxism06", "scip_feat_li_major_axis_length_SSC"
)


In [None]:
compare_features(
    df_joined, "ideas_feat_perimeterm01", "scip_feat_li_perimeter_crofton_BF1"
)


In [None]:
compare_features(df_joined, "ideas_feat_aream07", "scip_feat_li_area_CD3")


### Intensity features

In [None]:
compare_features(df_joined, "ideas_feat_rawintensitym06ssc", "scip_feat_li_sum_SSC")


In [None]:
compare_features(
    df_joined, "ideas_feat_intensitym01bf420nm480nm", "scip_feat_li_bgcorr_sum_BF1"
)


In [None]:
compare_features(
    df_joined, "ideas_feat_rawintensitym01bf420nm480nm", "scip_feat_li_sum_BF1"
)


In [None]:
compare_features(
    df_joined, "ideas_feat_intensitymccd15fitc", "scip_feat_li_combined_bgcorr_sum_CD15"
)


In [None]:
compare_features(
    df_joined, "ideas_feat_intensitymccd3bv421", "scip_feat_li_combined_bgcorr_sum_CD3"
)
