In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.feature_selection import mutual_info_regression
import numpy as np
import dcor
from minepy import MINE

In [None]:
df1=pd.read_csv('dataset.csv')
mask=df1['F']==3
df2=df1[~mask].copy()
df_corr=df2[['Bandgap, GGA (eV)', 'Dielectric constant, total', 'Refractive index',
       'Atomization energy (eV/atom)', 'Volume of the unit cell (A^3)',
       'Density (g/cm^3)', 'Tolerance factor', 'Octahedral factor', 'rA(Ang)',
       'rB(Ang)', 'rX(Ang)', 'A SITE DFE',
       'B SITE DFE', 'X SITE DFE']]
df_corr.rename(columns={'Bandgap, GGA (eV)':'Bandgap','A SITE DFE':'E$_A$', 'B SITE DFE':'E$_B$', 'X SITE DFE':'E$_X$', 'Atomization energy (eV/atom)':'E$_{atomization}$', 
                                'Density (g/cm^3)':'Density','rA(Ang)':'r$_A$','rB (Ang)':'r$_B$','rX(Ang)':'r$_X$', 'Volume of the unit cell (A^3)':'Unit cell volume', 'Dielectric constant, total':'Dielectric constant'},inplace=True)

In [None]:
pear_corr = df_corr.corr(method='spearman')

# Create a heatmap with adjustments for better visibility
plt.figure(figsize=(14, 10))  # Increase figure size if needed
heatmap = sns.heatmap(pear_corr, cmap='Blues', annot=True, fmt=".2f",
                      linewidths=0.5, linecolor='Black',  
                      annot_kws={"fontweight": "bold","size": 14},  # Adjust font size of annotations
                      cbar_kws={'shrink': 0.8})  # Adjust colorbar size
cbar = heatmap.collections[0].colorbar  # Access the colorbar from the heatmap object
cbar.ax.tick_params(labelsize=14)  
for label in cbar.ax.get_yticklabels():  # Iterate over the colorbar labels
    label.set_fontweight('bold')  # Set each colorbar tick label to bold

plt.title('Spearman Rank Correlation Matrix', fontsize=20, fontweight='bold')  # Bold title
plt.xticks(rotation=45, ha='right', fontsize=18, fontweight='bold')  # Bold x-axis labels
plt.yticks(rotation=0, fontsize=18, fontweight='bold')  # Bold y-axis labels)
plt.tight_layout()  # Adjust layout to fit labels
plt.savefig('spearman_correlation_matrix.pdf', format='pdf', bbox_inches='tight', transparent=True, dpi=2000)  # Save with higher resolution
plt.show()  # Show the plot

In [None]:
# Function to calculate mutual information between all pairs of columns
def calculate_mutual_info(df):
    mutual_info = pd.DataFrame(index=df.columns, columns=df.columns)
    for feature1 in df.columns:
        for feature2 in df.columns:
            if feature1 == feature2:
                mutual_info.at[feature1, feature2] = np.nan  # Self-correlation is not needed
            else:
                mi = mutual_info_regression(df[[feature1]], df[feature2])[0]
                mutual_info.at[feature1, feature2] = mi
    return mutual_info.astype(float)

# Calculate mutual information
mutual_info_df = calculate_mutual_info(df_corr)
# Plot the heatmap with bold font for the title, labels, and annotations
plt.figure(figsize=(14, 10))
heatmap=sns.heatmap(mutual_info_df, annot=True, fmt='.2f', cmap='Oranges', cbar=True, linewidths=0.5, linecolor='black',
            annot_kws={"fontweight": "bold", "size": 14},  # Adjust font size of annotations
                      cbar_kws={'shrink': 0.8})  # Bold annotations

cbar = heatmap.collections[0].colorbar  # Access the colorbar from the heatmap object
cbar.ax.tick_params(labelsize=14)  
for label in cbar.ax.get_yticklabels():  # Iterate over the colorbar labels
    label.set_fontweight('bold')  # Set each colorbar tick label to bold

plt.title('Mutual Information Heatmap', fontsize=20, fontweight='bold')  # Bold title
plt.xticks(rotation=45, ha='right', fontsize=18, fontweight='bold')  # Bold x-axis labels
plt.yticks(rotation=0, fontsize=18, fontweight='bold')  # Bold y-axis labels

plt.tight_layout()
plt.savefig('Mutual_info_heatmap.pdf', format='pdf', bbox_inches='tight', transparent=True, dpi=2000)  # Save with higher resolution
plt.show()

In [None]:
# Define a function to compute distance correlation for each pair of variables
def compute_dcor(df):
    n = df.shape[1]
    dcor_matrix = np.zeros((n, n))
    for i in range(n):
        for j in range(n):
            dcor_matrix[i, j] = dcor.distance_correlation(df.iloc[:, i], df.iloc[:, j])
    return pd.DataFrame(dcor_matrix, index=df.columns, columns=df.columns)

# Calculate the distance correlation matrix
dcor_matrix = compute_dcor(df_corr)

# Plot heatmap
plt.figure(figsize=(14, 10))
heatmap=sns.heatmap(dcor_matrix,fmt='.2f', annot=True, cmap='Greens', cbar=True, linewidths=0.5, linecolor='black',
            annot_kws={"fontweight": "bold","size": 14}, cbar_kws={'shrink': 0.8})
cbar = heatmap.collections[0].colorbar  # Access the colorbar from the heatmap object
cbar.ax.tick_params(labelsize=14)  
for label in cbar.ax.get_yticklabels():  # Iterate over the colorbar labels
    label.set_fontweight('bold')  # Set each colorbar tick label to bold

plt.title('Distance Correlation Heatmap',fontsize=20, fontweight='bold')
plt.xticks(rotation=45, ha='right', fontsize=18, fontweight='bold')  # Bold x-axis labels
plt.yticks(rotation=0, fontsize=18, fontweight='bold')  # Bold y-axis labels
plt.tight_layout()
plt.savefig('dcor_heatmap.pdf', format='pdf', bbox_inches='tight', transparent=True, dpi=2000)  # Save with higher resolution
plt.show()

In [None]:
# Define a function to compute MIC for each pair of variables
def compute_mic(df):
    n = df.shape[1]
    mic_matrix = np.zeros((n, n))
    mine = MINE()
    for i in range(n):
        for j in range(n):
            mine.compute_score(df.iloc[:, i], df.iloc[:, j])
            mic_matrix[i, j] = mine.mic()
    return pd.DataFrame(mic_matrix, index=df.columns, columns=df.columns)

# Calculate the MIC matrix
mic_matrix = compute_mic(df_corr)

# Plot heatmap
plt.figure(figsize=(14, 10))
heatmap=sns.heatmap(mic_matrix, annot=True, fmt='.2f', cmap='Reds',cbar=True, linewidths=0.5, linecolor='black',
            annot_kws={"fontweight": "bold","size": 14},cbar_kws={'shrink': 0.8})
cbar = heatmap.collections[0].colorbar  # Access the colorbar from the heatmap object
cbar.ax.tick_params(labelsize=14)  
for label in cbar.ax.get_yticklabels():  # Iterate over the colorbar labels
    label.set_fontweight('bold')  # Set each colorbar tick label to bold

plt.title('Maximal Information Coefficient (MIC) Heatmap',fontsize=20, fontweight='bold')
plt.xticks(rotation=45, ha='right', fontsize=18, fontweight='bold')  # Bold x-axis labels
plt.yticks(rotation=0, fontsize=18, fontweight='bold')  # Bold y-axis labels
plt.tight_layout()
plt.savefig('MIC_heatmap.pdf', format='pdf', bbox_inches='tight', transparent=True, dpi=2000)  # Save with higher resolution
plt.show()