In [None]:
import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
from sklearn.decomposition import FastICA
from sklearn.manifold import TSNE
from mpl_toolkits.mplot3d import Axes3D  # Import 3D plotting functionality
import plotly.express as px
import pandas as pd

In [None]:
# create one plot for each period (before or after word encoding/recalling), condition (recall/encode), subject (we are interested in subject level analyses), in MNI-level
# each csv corresponds to a word and the structure is MNI x time bins containing event type data (0 or 1 indicating absence or presence of high-frequency oscillation (HFO))
# we calculate vectors containing HFO rate for each MNI area for each period-condition-subject combination, reduce their dimension through PCA, ICA and tSNE into 2D and 3D 
# and create a scatter plot for each dim-reduction technique, each point colour coded by the semantic category of the original file's word

# word_categories is a dictionary containing semantic categories as keys and list of words for the corresponding category as values
# polish_to_eng is a dictionary containing polish words as keys and their english translation as values

PATH='~/subjects_pickle'

# loop over time periods
for period in ['before','after']:
    # loop over conditions
    for condition in ['RECALL','ENCODE']:
        # loop over subjects
        for subject in os.listdir(PATH):
            # set path
            subject_path=PATH+'/'+subject+'/mni-aal3/rasters/'+condition
            category_data_points={cat:[] for cat in word_categories.keys()}
            length=[]
            for csv in os.listdir(subject_path):
                #print(condition,subject,csv)
                # read the file and set index to be the Unnamed: 0 columns
                df=pd.read_csv(subject_path+'/'+csv,index_col=0)
                # Drop rows with all NaN or empty values
                df.dropna(how='all', inplace=True)
                # Drop columns with all NaN or empty values
                df.dropna(axis=1, how='all', inplace=True)
                if period=='before':
                    df=df.iloc[:,:150]
                elif period=='after':
                    df=df.iloc[:,150:]
                # Check if columns or index row contain 'N/A' or 'nan'
                if 'N/A' in df.columns:
                    # Drop columns with 'N/A'
                    df = df.drop(['N/A'], axis=1)
                if 'nan' in df.columns:
                    # Drop columns with 'nan'
                    df = df.drop(['nan'], axis=1)
                # Check if rows contain 'N/A' or 'nan'
                if np.nan in df.index:
                    # Drop the row with NaN in the index
                    df = df.drop(index=np.nan)
                if 'nan' in df.index:
                    # Drop the row with NaN in the index
                    df = df.drop(index='nan')
                if 'N/A' in df.index:
                    # Drop the row with N/A in the index
                    df = df.drop(index='N/A')
                df = df.fillna(0)
                # Calculate the sum of values for each row
                df['Sum'] = df.sum(axis=1)
                # Calculate the average HFO rate by dividing the sum by the number of time points
                df['Average HFO Rate'] = df['Sum'] / df.shape[1]
                # Create a new DataFrame with the index and 'Average HFO Rate' column
                average_hfo_df = df[['Average HFO Rate']]
                # keep the polish word of the csv file
                pol_word=csv.split('_')[4]
                # find the english translation
                eng_word=polish_to_eng[pol_word]
                #print(len(list(average_hfo_df['Average HFO Rate'])))
                # loop over categories
                for category in word_categories.keys():
                    if eng_word in word_categories[category]:
                        category_data_points[category].append(list(average_hfo_df['Average HFO Rate']))
                length.append(len(list(average_hfo_df['Average HFO Rate'])))
            # drop key:value pairs that have <2 points
            keys_to_remove = []

            # Identify keys to remove
            for key, value in category_data_points.items():
                if len(value) < 3:
                    keys_to_remove.append(key)
                for point in value:
                    if len(point)!=mode(length):
                        keys_to_remove.append(key)
            keys_to_remove=list(set(keys_to_remove))
            # Remove identified keys
            for key in keys_to_remove:
                del category_data_points[key]
            #print(category_data_points,subject.split('_')[0],period,condition)
            # 2D PCA
            pca_dict = {}  # To store PCA results for each category

            for category, data_points_list in category_data_points.items():
                data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                pca = PCA(n_components=2)  # Create a PCA object with 2 components for 2D visualization
                pca_result = pca.fit_transform(data_matrix)  # Apply PCA to the data matrix
                pca_dict[category] = pca_result

            plt.figure(figsize=(10, 8))

            for category, pca_result in pca_dict.items():
                plt.scatter(pca_result[:, 0], pca_result[:, 1], label=category)

            plt.xlabel('Principal Component 1')
            plt.ylabel('Principal Component 2')
            plt.title('2D PCA Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)
            plt.legend()
            plt.show()
            
            # 2D ICA
            ica_dict = {}  # To store ICA results for each category

            for category, data_points_list in category_data_points.items():
                data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                ica = FastICA(n_components=2)  # Create an ICA object with 2 components for 2D visualization
                ica_result = ica.fit_transform(data_matrix)  # Apply ICA to the data matrix
                ica_dict[category] = ica_result


            plt.figure(figsize=(10, 8))

            for category, ica_result in ica_dict.items():
                plt.scatter(ica_result[:, 0], ica_result[:, 1], label=category)

            plt.xlabel('Independent Component 1')
            plt.ylabel('Independent Component 2')
            plt.title('2D ICA Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)
            plt.legend()
            plt.show()
            # 2D tSNE
            tsne_dict = {}  # To store t-SNE results for each category

            for category, data_points_list in category_data_points.items():
                data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                tsne = TSNE(n_components=2, perplexity=2)  # Create a t-SNE object with 2 components and reduced perplexity
                tsne_result = tsne.fit_transform(data_matrix)  # Apply t-SNE to the data matrix
                tsne_dict[category] = tsne_result

            plt.figure(figsize=(10, 8))

            for category, tsne_result in tsne_dict.items():
                plt.scatter(tsne_result[:, 0], tsne_result[:, 1], label=category)

            plt.xlabel('t-SNE Dimension 1')
            plt.ylabel('t-SNE Dimension 2')
            plt.title('2D t-SNE Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)
            plt.legend()
            plt.show()

            # 3D PCA
            pca_dict = {}  # To store PCA results for each category

            for category, data_points_list in category_data_points.items():
                if len(data_points_list)>2:
                    data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                    pca = PCA(n_components=3)  # Create a PCA object with 3 components for 3D visualization
                    pca_result = pca.fit_transform(data_matrix)  # Apply PCA to the data matrix
                    pca_dict[category] = pca_result
                else:
                    continue


            fig = plt.figure(figsize=(10, 8))
            ax = fig.add_subplot(111, projection='3d')  # Create a 3D subplot

            for category, pca_result in pca_dict.items():
                ax.scatter(pca_result[:, 0], pca_result[:, 1], pca_result[:, 2], label=category)

            ax.set_xlabel('Principal Component 1')
            ax.set_ylabel('Principal Component 2')
            ax.set_zlabel('Principal Component 3')
            ax.set_title('3D PCA Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)
            ax.legend()
            plt.show()
            # 3D ICA
            ica_dict = {}  # To store ICA results for each category

            for category, data_points_list in category_data_points.items():
                if len(data_points_list) > 2:
                    data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                    ica = FastICA(n_components=3)  # Create an ICA object with 3 components for 3D visualization
                    ica_result = ica.fit_transform(data_matrix)  # Apply ICA to the data matrix
                    ica_dict[category] = ica_result
                else:
                    continue

            fig = plt.figure(figsize=(10, 8))
            ax = fig.add_subplot(111, projection='3d')  # Create a 3D subplot

            for category, ica_result in ica_dict.items():
                ax.scatter(ica_result[:, 0], ica_result[:, 1], ica_result[:, 2], label=category)

            ax.set_xlabel('Independent Component 1')
            ax.set_ylabel('Independent Component 2')
            ax.set_zlabel('Independent Component 3')
            ax.set_title('3D ICA Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)
            ax.legend()
            plt.show()

            # 3D tSNE
            tsne_dict = {}  # To store t-SNE results for each category

            for category, data_points_list in category_data_points.items():
                if len(data_points_list) > 2:
                    data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                    tsne = TSNE(n_components=3, perplexity=2)  # Create a t-SNE object with 3 components for 3D visualization
                    tsne_result = tsne.fit_transform(data_matrix)  # Apply t-SNE to the data matrix
                    tsne_dict[category] = tsne_result
                else:
                    continue

            fig = plt.figure(figsize=(10, 8))
            ax = fig.add_subplot(111, projection='3d')  # Create a 3D subplot

            for category, tsne_result in tsne_dict.items():
                ax.scatter(tsne_result[:, 0], tsne_result[:, 1], tsne_result[:, 2], label=category)

            ax.set_xlabel('t-SNE Dimension 1')
            ax.set_ylabel('t-SNE Dimension 2')
            ax.set_zlabel('t-SNE Dimension 3')
            ax.set_title('3D t-SNE Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)
            ax.legend()
            plt.show()
            
            # 3D PCA interactive
            pca_dict = {}  # To store PCA results for each category

            for category, data_points_list in category_data_points.items():
                if len(data_points_list) > 2:
                    data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                    pca = PCA(n_components=3)  # Create a PCA object with 3 components for 3D visualization
                    pca_result = pca.fit_transform(data_matrix)  # Apply PCA to the data matrix
                    pca_dict[category] = pca_result
                else:
                    continue

            # Create a combined data frame for the interactive plot
            combined_data = []
            for category, pca_result in pca_dict.items():
                for point in pca_result:
                    combined_data.append({'category': category, 'pc1': point[0], 'pc2': point[1], 'pc3': point[2]})

            combined_df = pd.DataFrame(combined_data)

            # Create the interactive 3D scatter plot using Plotly Express
            fig = px.scatter_3d(combined_df, x='pc1', y='pc2', z='pc3', color='category')
            fig.update_layout(scene=dict(xaxis_title='Principal Component 1',
                                         yaxis_title='Principal Component 2',
                                         zaxis_title='Principal Component 3'),
                              title='Interactive 3D PCA Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)

            fig.show()

            # 3D ICA interactive
            ica_dict = {}  # To store ICA results for each category

            for category, data_points_list in category_data_points.items():
                if len(data_points_list) > 2:
                    data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                    ica = FastICA(n_components=3)  # Create an ICA object with 3 components for 3D visualization
                    ica_result = ica.fit_transform(data_matrix)  # Apply ICA to the data matrix
                    ica_dict[category] = ica_result
                else:
                    continue

            # Create a combined data frame for the interactive plot
            combined_data = []
            for category, ica_result in ica_dict.items():
                for point in ica_result:
                    combined_data.append({'category': category, 'ic1': point[0], 'ic2': point[1], 'ic3': point[2]})

            combined_df = pd.DataFrame(combined_data)

            # Create the interactive 3D scatter plot using Plotly Express
            fig = px.scatter_3d(combined_df, x='ic1', y='ic2', z='ic3', color='category')
            fig.update_layout(scene=dict(xaxis_title='Independent Component 1',
                                         yaxis_title='Independent Component 2',
                                         zaxis_title='Independent Component 3'),
                              title='Interactive 3D ICA Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)

            fig.show()
            
            # 3D tSNE interactive
            tsne_dict = {}  # To store t-SNE results for each category

            for category, data_points_list in category_data_points.items():
                if len(data_points_list) > 2:
                    data_matrix = np.array(data_points_list)  # Convert list of data points to a matrix
                    tsne = TSNE(n_components=3, perplexity=2)  # Create a t-SNE object with 3 components for 3D visualization
                    tsne_result = tsne.fit_transform(data_matrix)  # Apply t-SNE to the data matrix
                    tsne_dict[category] = tsne_result
                else:
                    continue

            # Create a combined data frame for the interactive plot
            combined_data = []
            for category, tsne_result in tsne_dict.items():
                for point in tsne_result:
                    combined_data.append({'category': category, 'tsne1': point[0], 'tsne2': point[1], 'tsne3': point[2]})

            combined_df = pd.DataFrame(combined_data)

            # Create the interactive 3D scatter plot using Plotly Express
            fig = px.scatter_3d(combined_df, x='tsne1', y='tsne2', z='tsne3', color='category')
            fig.update_layout(scene=dict(xaxis_title='t-SNE Dimension 1',
                                         yaxis_title='t-SNE Dimension 2',
                                         zaxis_title='t-SNE Dimension 3'),
                              title='Interactive 3D t-SNE Visualization'+' - '+subject.split('_')[0]+' - '+condition+'_'+period)

            fig.show()
