# Libraries

In [None]:
import numpy as np
import pandas as pd 
import networkx as nx
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.manifold import TSNE
from scipy.interpolate import splprep, splev
from sklearn.cluster import KMeans
from scipy.spatial import ConvexHull

%config InlineBackend.figure_formats=['retina']

In [None]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.feature_selection import VarianceThreshold

In [None]:
from SurgeLibrary.stability_selection import StabilitySelection, plot_stability_path
from SurgeLibrary.preprocessing import remove_low_info_samples, LowInfoFilter
from SurgeLibrary.visualization import boxplot_features

# Data

In [None]:
D0_unstim = pd.read_csv('../Data_POCK/Immuno/D0_Unstim&Freq.csv', index_col=0)
D0_unstim.columns = [el[1]+'_'+el[-1] for el in D0_unstim.columns.str.split('_')]
D0 = D0_unstim.copy()
D0.index = [idx + '_D0' for idx in D0.index]
labels_D0 = pd.read_csv('../Data_POCK/Immuno/labels_D0.csv', index_col=0)
# labels_D0.index = [idx + '_D0' for idx in labels_D0.index]

D1_unstim = pd.read_csv('../Data_POCK/Immuno/D1_Unstim&Freq.csv', index_col=0)
D1_unstim.columns = [el[1]+'_'+el[-1] for el in D1_unstim.columns.str.split('_')]
D1 = D1_unstim.copy()
D1.index = [idx + '_D1' for idx in D1.index]
labels_D1 = pd.read_csv('../Data_POCK/Immuno/labels_D1.csv', index_col=0)
# labels_D1.index = [idx + '_D1' for idx in labels_D1.index]

D7_unstim = pd.read_csv('../Data_POCK/Immuno/D7_Unstim&Freq.csv', index_col=0)
D7_unstim.columns = [el[1]+'_'+el[-1] for el in D7_unstim.columns.str.split('_')]
D7 = D7_unstim.copy()
D7.index = [idx + '_D7' for idx in D7.index]
labels_D7 = pd.read_csv('../Data_POCK/Immuno/labels_D7.csv', index_col=0)
# labels_D7.index = [idx + '_D7' for idx in labels_D7.index]

D90_unstim = pd.read_csv('../Data_POCK/Immuno/D90_Unstim&Freq.csv', index_col=0)
D90_unstim.columns = [el[1]+'_'+el[-1] for el in D90_unstim.columns.str.split('_')]
D90 = D90_unstim.copy()
D90.index = [idx + '_D90' for idx in D90.index]
labels_D90 = pd.read_csv('../Data_POCK/Immuno/labels_D90.csv', index_col=0)
# labels_D90.index = [idx + '_D90' for idx in labels_D90.index]
df_tot = pd.concat([D0, D1, D7, D90], axis='rows')


# Effect directions

In [None]:
D0_std = (D0_unstim - D0_unstim.mean())/(D0_unstim.std()+1e-8)
D1_std = (D1_unstim - D1_unstim.mean())/(D1_unstim.std()+1e-8)
D7_std = (D7_unstim - D7_unstim.mean())/(D7_unstim.std()+1e-8)
D90_std = (D90_unstim - D90_unstim.mean())/(D90_unstim.std()+1e-8)

# ANOVA

In [None]:
X_D0 = D0_unstim.join(labels_D0)
X_D0['TimePoint'] = 0
X_D1 = D1_unstim.join(labels_D1)
X_D1['TimePoint'] = 1
X_D7 = D7_unstim.join(labels_D7)
X_D7['TimePoint'] = 2
X_D90 = D90_unstim.join(labels_D90)
X_D90['TimePoint'] = 3

In [None]:
df_anova = pd.concat([X_D0, X_D1, X_D7, X_D90],axis='rows').dropna(subset=['POCD_J7_pos']).reset_index(drop=True)
#df_anova.fillna(df_anova.median(), inplace=True)
df_anova['POCD_J7_pos'] = df_anova.POCD_J7_pos.astype(int)

In [None]:
import statsmodels.api as sm
from statsmodels.formula.api import ols
pvalues_anova = pd.DataFrame(index = df_anova.columns[:-2])
for feature in df_anova.columns[:-2]:
    # Ordinary Least Squares (OLS) model
    model = ols(f'{feature} ~ C(POCD_J7_pos)*C(TimePoint)', data=df_anova).fit()
    table = sm.stats.anova_lm(model,typ=1)
    pvalues_anova.loc[feature,'pvalue_anova']=table.loc['C(POCD_J7_pos)', 'PR(>F)']
pvalues_anova = pvalues_anova.fillna(1)

# t-SNE

In [None]:
df_corr = df_tot.corr()
tsne = TSNE(learning_rate='auto', random_state=4).fit(df_tot.fillna(df_tot.median()).T)
df_tsne = pd.DataFrame(data=tsne.embedding_, index = df_tot.columns, columns = ['coor1', 'coor2'])
clusterer = KMeans(n_clusters=10, n_init=100, random_state=42, init='k-means++')
cluster_labels = clusterer.fit_predict(df_tsne[['coor1', 'coor2']])
cen_x, cen_y = clusterer.cluster_centers_.T
df_tsne['cluster'] = cluster_labels
df_tsne = pd.concat([df_tsne, pvalues_anova], axis='columns')

# Correlation Plot

In [None]:
#df_tsne.to_csv('./df_tsne.csv')
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap

colors = [(1, 0, 0), (.7, .4, 0), (0, 0, 1)]  # R -> G -> B
colors = [(0,(0, 0, 1)),(.35,(227/256, 163/256, 103/256)),(.65,(227/256, 163/256, 103/256)), (1,(1, 0, 0))]
n_bins = [3, 6, 10, 100]  # Discretizes the interpolation into bins
cmap_name = 'custom_cmp'

cmap = LinearSegmentedColormap.from_list(cmap_name, colors, N=256,gamma=1)
from adjustText import adjust_text
from matplotlib import colors

CORR_THRESH = .7
TIMESTAMP = 'D1'
# sizes_source = 'pvalue_'+TIMESTAMP
sizes_source = 'pvalue_anova'
#colors_source = 'effect_dir_'+TIMESTAMP

SIZES = []
for value in df_tsne[sizes_source].astype(float):
    if value > 0.05:
        SIZES.append(5)
    else:
        SIZES.append(2*(-np.log10(value)*6)**2)

        
COLORS = ['C'+str(i) for i in df_tsne.cluster]

fig, ax = plt.subplots(figsize=(10, 10))

# --DRAWING CLUSTERS--

for i in np.unique(cluster_labels):
    points = df_tsne[df_tsne.cluster == i][['coor1', 'coor2']].values
    # get convex hull
    hull = ConvexHull(points)
    # get x and y coordinates
    # repeat last point to close the polygon
    x_hull = np.append(points[hull.vertices,0],
                       points[hull.vertices,0][0])
    y_hull = np.append(points[hull.vertices,1],
                       points[hull.vertices,1][0])
    
    tck, _ = splprep([x_hull, y_hull], s=0, per = True)
    xx, yy = splev(np.linspace(0, 1, 100), tck, der = 0)
    
    # plot shape
    plt.fill(xx, yy, alpha=.1, color='C'+str(i), label=f'Cluster {i}')

# --DRAWING POINTS--
scatter = plt.scatter(df_tsne.coor1,
                     df_tsne.coor2, 
                     s=SIZES, 
                     marker='o', 
                     c=COLORS,
                     norm=colors.CenteredNorm(),
                     edgecolors=None,
                     cmap=cmap,
                     alpha=.7
                    )

ax.axis("off")
plt.legend()
# fig.colorbar(scatter,
#              anchor=(0, 0),
#              ax=ax, 
#              shrink=.2, 
#              label='Effect direction', 
#              aspect=8, 
#              use_gridspec=True,
#              location='right'
#             )

def funct(s):
    to_ret=np.zeros(len(s))
    for idx, el in enumerate(s):
        if el==5:
            to_ret[idx] = 1
        else:
            to_ret[idx] = np.exp(np.log(10)*(-((el/2)**(1/2))/6))
    return to_ret

kw = dict(prop="sizes", num=[1, 0.05, 0.01, 0.001], color='gray',
          func=funct)

# kw = dict(prop="sizes", num=[0, .1, .2], color='gray', fmt="{x:.2f}",
#            func=lambda s: np.sqrt(s)/75 - 0.01)

ax.legend(*scatter.legend_elements(**kw),title="ANOVA pvalue",labelspacing=2, loc='lower right', frameon=False)

for i in df_tot.columns:
    for j in df_tot.columns:
        if df_corr.loc[i,j]>=CORR_THRESH and df_corr.loc[i,j] < 1:
            sub_df = df_tsne.loc[[i, j]]
            ax.plot(sub_df.coor1, sub_df.coor2, linewidth=.2,color='gray', alpha=.05)
            
annot=[]        
for i, txt in enumerate(df_tsne.index):
    if df_tsne.iloc[i][sizes_source] <= 0.01:
        annot.append(ax.annotate(txt, (df_tsne.iloc[i].coor1, df_tsne.iloc[i].coor2), fontsize=12))
        
adjust_text(annot)

#plt.savefig('./Correlation networks/Correlation network - ANOVA/correlation networks.pdf', dpi=200)