In [None]:
import pandas as pd
import json
import modify_data

In [None]:
summary_df = modify_data.generate_summary()
genes_df = modify_data.generate_genes_info()
fileName = modify_data.dataSource()
organ = str(modify_data.dataSource()).split('/')[-2].split('_')[-1]


In [None]:
# Initialize the JSON data structure
data = {
  "@context": [
    "https://cns-iu.github.io/hra-cell-type-populations-supporting-information/data-processor/ccf-context.jsonld",
    {
      "UBERON": {
        "@id": "http://purl.obolibrary.org/obo/UBERON_",
        "@prefix": true
      },
      "illustration_files": {
        "@id": "ccf:has_illustration_file",
        "@type": "@id"
      },
      "mapping": {
        "@id": "ccf:has_illustration_node",
        "@type": "@id"
      },
      "organ_id": {
        "@id": "ccf:organ_id",
        "@type": "@id"
      },
      "data_sources": {
        "@id": "ccf:has_data_source",
        "@type": "@id"
      }
    }
  ],
  "@graph": [
    {
      "@type": "CellSummary",
      "cell_source": modify_data.dataSource(),  # Corrected to call the function
      "annotation_method": "Aggregation",
      "biomarker_type": "gene",
      "summary": []
        }
    ]
}


In [None]:
# Create a dictionary to store cell-label-to-genes mapping
cell_label_to_genes = {}

# Iterate through rows in genes_df
for _, genes_row in genes_df.iterrows():
    cell_label = genes_row['cell_label']
    gene_data = {
        "@type": "GeneExpression",
        "ensembl_id": genes_row['ensembl_id'],
        "gene_id": genes_row['gene_id'],
        "gene_label": genes_row['gene_label'],
        "mean_expression": genes_row['mean_expression'],
        "p_fold": genes_row['pfold_val']
    }
    
    if cell_label in cell_label_to_genes:
        cell_label_to_genes[cell_label].append(gene_data)
    else:
        cell_label_to_genes[cell_label] = [gene_data]

# Iterate through rows in summary_df to create the JSON structure
for _, summary_row in summary_df.iterrows():
    cell_label = summary_row['cell_label']
    
    cell_summary = {
        "@type": "CellSummaryRow",
        "cell_id": summary_row['cell_id'],
        "cell_label": cell_label,
        "genes": cell_label_to_genes.get(cell_label, []),
        "count": summary_row['count'],
        "percentage": summary_row['percentage']
    }
    
    data["@graph"][0]["summary"].append(cell_summary)



In [None]:
# Corrected the way to include the organ variable in the filename
with open(f'{organ}.json', 'w') as json_file:
    json.dump(data, json_file, indent=4)

print(f"JSON file {organ}.json has been created.")