# This notebook must be run with the Vizgen_2 conda environment within the Vizgen.sif singularity container

In [2]:
import os
import scanpy as sc
import geojson
import geopandas as gpd
from shapely.geometry import Polygon, shape
from shapely.affinity import affine_transform
from shapely.wkb import dumps
import pandas as pd

baysor_path = '/hpc/projects/group.quake/doug/Shapes_Spatial/'
meta_list = [
    '24-mo-female-1',
    '24-mo-female-3',
    '24-mo-female-5',
    '24-mo-male-1',
    '24-mo-male-2',
    '24-mo-male-4-rev2',
    '3-mo-female-1-rev2',
    '3-mo-female-2',
    '3-mo-female-3',
    '3-mo-male-1',
    '3-mo-male-2',
    '3-mo-male-3-rev2'
]
region_types = ['region_0', 'region_1']

adata = sc.read_h5ad('../02_annotation/baysor_microglia.h5ad')

for meta in meta_list:
    print(meta)
    # Check for the existence of files with different region types
    file_found = False
    for region in region_types:
        file = f'{baysor_path}{meta}/baysor/{region}_6-5_micron_polygons.json'
        if os.path.exists(file):
            file_found = True
            break
    
    if not file_found:
        print(f"No file found for {meta} in regions {region_types}")
        continue
    
    with open(file) as f:
        geometries = geojson.load(f)
    
    # Load the affine transformation matrix
    transform_file = f'{baysor_path}{meta}/images/micron_to_mosaic_pixel_transform.csv'
    transform_df = pd.read_table(transform_file,sep=' ', header=None)
    transformation_matrix = transform_df.values
    
    identifier = adata[adata.obs.batchID == meta].obs.iloc[0].Name.split('-')[0]
    shapes = []
    cells = []
    for geo in geometries['geometries']:
        if len(geo['coordinates'][0]) > 2:
            polygon = Polygon(geo['coordinates'][0])
            #transformed_polygon = affine_transform(polygon, [affine_matrix[0], affine_matrix[1], affine_matrix[2], affine_matrix[3], affine_matrix[4], affine_matrix[5]])
            shapes.append(polygon)
            cells.append(identifier + '-' + str(geo['cell']))
    
    cells_gdf = gpd.GeoDataFrame({'Geometry': shapes, 'Name': cells}, geometry='Geometry')
    cells_gdf['Geometry'] = cells_gdf['Geometry'].scale(xfact = transformation_matrix[0,0], yfact = transformation_matrix[1,1], origin = (0, 0, 0)).translate(transformation_matrix[0, 2], transformation_matrix[1, 2])

    cells_gdf.to_parquet(f'{baysor_path}{meta}/baysor/{region}_6-5_micron_polygons.parquet')

print("Processing complete.")

24-mo-female-1
24-mo-female-3
24-mo-female-5
24-mo-male-1
24-mo-male-2
24-mo-male-4-rev2
3-mo-female-1-rev2
3-mo-female-2
3-mo-female-3
3-mo-male-1
3-mo-male-2
3-mo-male-3-rev2
Processing complete.
