In [1]:
import pandas as pd
import seaborn as sns
from matplotlib import rcParams
from sklearn.preprocessing import MinMaxScaler
import matplotlib
import numpy as np
import scipy
import matplotlib as plt
import matplotlib.patches as patches

# Import needed files

In [2]:
quant = pd.read_csv("Data/cap_manu/input/40_varieties_final_quant.csv")
quant = quant.rename(columns={c: c.split('_')[2] for c in quant.columns if 'Peak area' in c})
quant_area = quant[[c for c in quant.columns if c.startswith('S')] + ['row ID']]
sirius = pd.read_csv("Data/cap_manu/input/canopus_compound_summary_40_varieties.tsv", usecols = ['featureId','NPC#class'], sep= '\t')
sample_names = pd.read_excel('Data/cap_manu/input/sample_names.xlsx')

Merge SIRIUS predictions with feature quantification table

In [None]:
quant_area.rename(columns={'row ID':'featureId'}, inplace=True)
table= pd.merge(quant_area, sirius, on='featureId', how='left')
#table

Select class wanted to cluster

In [6]:
# Prompt the user to input specific row IDs
selected_row_ids = input('Enter NPC#Class' )
#Capsaicins and Capsaicinoids
# Reduce the DataFrame to the selected row IDs
reduced_table = table[table['NPC#class'].astype(str).str.contains(selected_row_ids)]

In [None]:
final_table = reduced_table[[c for c in reduced_table.columns if c.startswith('S')]]
final_table = final_table[sorted(final_table.columns, key=lambda c: int(c[1:]))]
final_table.columns = final_table.columns.map(sample_names.set_index('Sample')['Common Name'])
final_table

Normalizing based most intense feature per sample

In [25]:
# log 2 transformation of the data
no_zero_f_t=final_table.replace(0,1e-6)
log_final_table=np.log2(no_zero_f_t)
log_final_table=log_final_table.round(5)
log_final_table= log_final_table.replace(-19.93157,0)

# log 10 transofrmationn of the data
# no_zero_f_t=final_table.replace(0,1e-6)
# log_final_table=np.log10(no_zero_f_t)
# log_final_table= log_final_table.replace(1e-6,0)

Creating a test heatmap

In [None]:
rcParams['figure.figsize'] = 12, 8
#corr = final_table.corr()

#Normalize the data using Min-Max normalization
#scaler = MinMaxScaler()
#normalized_data = scaler.fit_transform(final_table)

# Normalize the data per row
normalized_df = log_final_table.div(log_final_table.max(axis=0), axis=1)

# Generate a heatmap
sns.heatmap(normalized_df, linewidth=0.5, cmap='viridis')


Transpose datafram and create a color palet for the different species

In [28]:
# transpose df
normalized_df_t = normalized_df.transpose()

# Add 'Species' column
species = [sample_names.loc[sample_names['Common Name'] == common_name].Species.values[0] for common_name in normalized_df.columns]
normalized_df_t = normalized_df_t.assign(Species = species)

In [29]:
# Prepare colors: https://stackoverflow.com/questions/34334796/setting-col-colors-in-seaborn-clustermap-from-pandas
network_pal = sns.color_palette(['red', 'blue', 'green', 'orange', 'purple'], len(normalized_df_t.Species.unique()))
network_lut = dict(zip(normalized_df_t.Species.unique(), network_pal))
networks = normalized_df_t.Species
network_colors = pd.Series(networks).map(network_lut)

Plot final heatmap and clustering

In [None]:
plt.rcParams['font.family'] = 'Arial'
g = sns.clustermap(normalized_df_t[normalized_df_t.columns[0:-1]],
                   col_cluster=False,
                   cmap="cividis",
                   vmin=0,
                   figsize=(10, 15),
                   xticklabels=False,
                   row_colors=network_colors)
sns.set(font_scale=1.2)

ax = g.ax_heatmap

# highlighted_columns = [50, 56]  # columns to highlight (0-based index)
# for col in highlighted_columns:
#     rect = patches.Rectangle((col, 0), 1, normalized_df_t.shape[0], linewidth=1, edgecolor='none', facecolor='red', alpha=0.3)
#     ax.add_patch(rect)
    
ax.set_xlabel('SIRIUS predicted Capsaicinoids', family='Arial')
g.savefig('Data/cap_manu/heat_map_Column_log2norm.svg')

In [None]:
normalized_vec = [i for i in normalized_df.index]
type(normalized_vec)
normalized_vec.index(121)

In [None]:

pd.set_option('display.max_rows', None)
normalized_df