In [None]:
#importing prep_data file, jupyter notebook style
%run Prep_data_All_Patients.ipynb

In [None]:
supDir = '/Users/elikond/Downloads/surprisal_analysis/'
clusterDir = '/Users/elikond/Downloads/clusters/'
mesenDir = '/Users/elikond/Desktop/Brown_Work/MesenProneural/'

In [None]:
import plotly.express as px
from sklearn.neighbors import NearestCentroid
import matplotlib.pyplot as plt

In [None]:
def final_figures(clusterNum, supDir, clusterDir, mesenDir, patientID):
    merged_df, X_data, y_data, barcode_len, sigSubpopsDF = final(supDir, clusterDir, mesenDir, patientID)
    pie_by_cluster(clusterNum, sigSubpopsDF)
    pie_all_clusters(sigSubpopsDF)
    bar_chart(sigSubpopsDF)
    mesen_proneural(sigSubpopsDF)

In [None]:
final_figures(1, supDir, clusterDir, mesenDir, 'gb9')

In [None]:
def subpop_in_cluster(sigSubpopsDF):
    #dict1 --> {cluster: {subpop1: 2, subpop2: 1, ...}, cluster1: {subpop1 : 2}}
    dict1 = dict()
    for i, my_cluster in enumerate(sigSubpopsDF['seurat_clusters']):
        x = sigSubpopsDF['Subpopulations'][i]
        if my_cluster not in dict1:
            dict1[my_cluster] = dict()
        if x not in dict1[my_cluster]:
            dict1[my_cluster][x] = 1
        else:
            dict1[my_cluster][x] += 1
    return dict1

def pie_by_cluster(clusterNum, sigSubpopsDF):
    pie_dict = subpop_in_cluster(sigSubpopsDF)
    df = pd.DataFrame()
    df['Subpopulations'] = [*pie_dict[clusterNum].keys()]
    df['Num'] = [*pie_dict[clusterNum].values()]
    fig = px.pie(df, values = 'Num', names = 'Subpopulations', title = 'Cluster ' + str(clusterNum))
    #fig.update_traces(textposition='inside')
    fig.show()
    return fig, df

In [None]:
merged_df, X_data, y_data, barcode_len, sigSubpopsDF = final(supDir, clusterDir, mesenDir, 'gb13')
pie_by_cluster(1, sigSubpopsDF)

In [None]:
def pie_all_clusters(sigSubpopsDF):
    subpop_count = sigSubpopsDF['Subpopulations'].value_counts().to_frame()
    subpop_count.rename(columns={"Subpopulations": "Num"}, inplace = True)
    subpop_count['Subpopulations'] = subpop_count.index
    fig = px.pie(subpop_count, values = 'Num', names = 'Subpopulations')
    fig.show()
    return fig, subpop_count

In [None]:
pie_all_clusters(sigSubpopsDF)

In [None]:
def bar_chart(sigSubpopsDF):
    sigControlDF, sigTreatmentDF = split_cellType(sigSubpopsDF)
    fig1, subpopControl_count = pie_all_clusters(sigControlDF)
    subpopControl_count['Cell_Type'] = len(subpopControl_count) * ['Control']
    subpopControl_count['Percent'] = subpopControl_count['Num'].div(sum(subpopControl_count['Num']))
    
    fig2, subpopTreatment_count = pie_all_clusters(sigTreatmentDF)
    subpopTreatment_count['Cell_Type'] = len(subpopTreatment_count) * ['Treatment']
    subpopTreatment_count['Percent'] = subpopTreatment_count['Num'].div(sum(subpopTreatment_count['Num']))

    subpop_count_both = pd.concat([subpopControl_count, subpopTreatment_count])
    
    fig = px.bar(subpop_count_both, x="Cell_Type", y="Percent", color="Subpopulations")
    fig.show()

In [None]:
bar_chart(sigSubpopsDF)

In [None]:
def centeroidnp(arr):
    length = arr.shape[0]
    sum_x = np.sum(arr[:, 0])
    sum_y = np.sum(arr[:, 1])
    return sum_x/length, sum_y/length

def mesen_proneural(sigSubpopsDF):
    my_coords_x = list()
    my_coords_y = list()
    subpops = list(sigSubpopsDF['Subpopulations'])
    plt.figure(figsize=(20,20))
    for subpop in range(max(subpops)):
        temp_df = sigSubpopsDF[sigSubpopsDF["Subpopulations"] == subpop]

        my_x = list(temp_df['Mesenchymal'])
        my_y = list(temp_df['Proneural'])
        my_zip = zip(my_x, my_y)

        X = np.array([i for i in my_zip])
        centroid_coord = centeroidnp(X)
        my_coords_x.append(centroid_coord[0])
        my_coords_y.append(centroid_coord[1])

        plt.subplot(5,5,subpop+1)
        plt.xlim([0.05, 0.25])
        plt.ylim([0.05, 0.25])
        plt.scatter(my_x, my_y, c = 'blue')
        plt.scatter(centroid_coord[0], centroid_coord[1], c = 'black')
        plt.title('Subpopulation ' + str(subpop))
    plt.show()
    
    my_coords_xArr = np.array(my_coords_x)
    my_coords_yArr = np.array(my_coords_y)

    mesenchymal_min = np.argmin(my_coords_xArr)
    mesenchymal_max = np.argmax(my_coords_xArr)
    proneural_min = np.argmin(my_coords_yArr)
    proneural_max = np.argmax(my_coords_yArr)
    print(mesenchymal_min, mesenchymal_max, proneural_min, proneural_max)

In [None]:
merged_df, X_data, y_data, barcode_len, sigSubpopsDF = final(supDir, clusterDir, mesenDir, 'gb9')

In [None]:
mesen_proneural(sigSubpopsDF)

In [None]:
def box_plot(df):
    pass