In [None]:
import pandas as pd
import plotly.express as px
from pathlib import Path
import math

vampseq_folder = '../Data/filtered_ppj_data/VAMPseq'
sge_folder = "../Data/filtered_ppj_data/SGE"

vampseq_genes = ['F9', 'TSC2', 'G6PD']
sge_genes = ['BRCA2', 'PALB2', 'CTCF', 'XRCC2', 'RAD51D', 'BARD1', 'SFPQ']

vampseq_files = {'F9': ['../Data/filtered_ppj_data/VAMPseq/F9_ab001.csv', '../Data/filtered_ppj_data/VAMPseq/F9_ab102.csv', '../Data/filtered_ppj_data/VAMPseq/F9_ab124.csv', '../Data/filtered_ppj_data/VAMPseq/F9_ab3570.csv', '../Data/filtered_ppj_data/VAMPseq/F9_strep.csv'],
                 'G6PD': ['../Data/filtered_ppj_data/VAMPseq/G6PD_scores_consequence.csv'],
                 'TSC2': ['../Data/filtered_ppj_data/VAMPseq/TSC2_scores_consequences.csv']
                }

In [None]:
def read_data(vamp, sge):

    vampseq = Path(vamp)
    sge = Path(sge)

    sge_files = list(sge.glob("*tsv*"))
    
    vampseq_data = {}
    sge_data = {}

    f9_data = []

    for gene in sge_genes:
        for elem in sge_files:
            if gene in str(elem):
                df = pd.read_csv(elem, sep = '\t')
                df = df.loc[df['variant_qc_flag'] != 'WARN']
                sge_data[gene] = df


    for gene in vampseq_genes:
        file_list = vampseq_files[gene]

        for file in file_list:
            df = pd.read_csv(file)
            if gene == 'F9':
                f9_data.append(df)
            else:
                vampseq_data[gene] = df



    vampseq_data['F9'] = f9_data

    return vampseq_data, sge_data

In [None]:
def sge_sunburst(sge_data):
    rows = []
    
    for gene in sge_data:
        df = sge_data[gene]
        variants = len(df)
        measurements = len(df) * 3
        
        if gene in ['BARD1', 'RAD51D', 'XRCC2', 'PALB2']:
            measurements = math.floor(measurements * 0.6138341732 + measurements)
        
        # Add row for variants
        rows.append({
            'study': 'SGE',  # Add this column
            'gene': gene,
            'type': 'variants',
            'value': variants
        })
        
        # Add row for measurements
        rows.append({
            'study': 'SGE',  # Add this column
            'gene': gene,
            'type': 'measurements',
            'value': measurements
        })
    
    sunburst_df = pd.DataFrame(rows)
    
    fig = px.sunburst(sunburst_df, 
                      path=['study', 'gene', 'type'],  # Add 'study' first
                      values='value')
    
    # Remove all text labels
    #fig.update_traces(textinfo='none')
    
    # Make it bigger
    fig.update_layout(width=1000, height=1000)
    
    fig.show()
    #fig.write_image('/Users/ivan/Desktop/pillar_project_figs/20251112_SGE_Sunburst.svg')

In [None]:
def vampseq_sunburst(vampseq_data):

    rows = []

    for gene in vampseq_genes:
        if gene == 'F9':
            dfs = vampseq_data[gene]

            variants = len(dfs[0])

            rows.append({
                'study': 'VAMP-seq',
                'gene': gene,
                'type': 'variants',
                'value': variants
            })

            total_measurements = 0
            for df in dfs:
                total_measurements += len(df)

            rows.append({
                'study': 'VAMP-seq',
                'gene': gene,
                'type': 'measurements',
                'value': total_measurements
            })
        else:
            df = vampseq_data[gene]
            variants = len(df)
            measurements = len(df)

            rows.append({
                'study': 'VAMP-seq',
                'gene': gene,
                'type': 'variants',
                'value': variants
            })

            rows.append({
                'study': 'VAMP-seq',
                'gene': gene,
                'type': 'measurements',
                'value': measurements
            })
    sunburst_df = pd.DataFrame(rows)
    print(sunburst_df)


    fig = px.sunburst(sunburst_df, 
                      path=['study', 'gene', 'type'],  # Add 'study' first
                      values='value')

    # Make it bigger
    fig.update_layout(width=1000, height=1000)
    
    fig.show()
    #fig.write_image('/Users/ivan/Desktop/pillar_project_figs/20251112_VAMPseq_Sunburst.svg')

In [None]:
def main():
    vampseq_data, sge_data = read_data(vampseq_folder, sge_folder)
    sge_sunburst(sge_data)
    vampseq_sunburst(vampseq_data)

In [None]:
main()