## Undo log normalisation 

In [None]:
# With total counts column

def undo_log_normalisation(adata, 
                          total_counts_col: str = 'total_counts',
                          return_layer: str = 'counts',
                          scaling_factor: float = 10000,
                         ):
    r""" Undoing standard scanpy and seurat log-normalisation
                    
    
    Parameters
        
    adata
        anndata object with data that needs to be un-lognormalised in adata.X
    total_counts_col
        adata.obs columns with total count per cell that was computed before normalisation
    return_layer
        adata.layers[return_layer] layer name where to save un-lognormalised counts
    scaling_factor
        scaling factor that was used when normalising data (generally 10,000)
    """
    total_counts = adata.obs[total_counts_col].values.reshape((adata.n_obs, 1))
    adata.layers[return_layer] = adata.X.expm1().multiply(total_counts) / scaling_factor
    from scipy.sparse import csr_matrix
    adata.layers[return_layer] = csr_matrix(adata.layers[return_layer])
    print('Check that untranformed values are very close to integers')
    print(adata.layers[return_layer].data)
    adata.layers[return_layer].data = np.round(adata.layers[return_layer].data)
    return adata

In [None]:
# without total counts column

def undo_log_normalisation(adata, 
                           return_layer: str = 'counts',
                           scaling_factor: float = 10000):
    r""" Undoing standard Scanpy and Seurat log-normalisation without needing precomputed total counts.
    
    Parameters:
    -----------
    adata : anndata.AnnData
        AnnData object with log-normalized data in `adata.X`.
    return_layer : str
        Name of `adata.layers[return_layer]` where un-lognormalised counts will be saved.
    scaling_factor : float
        Scaling factor used when normalising data (default is 10,000).
    """

    # Estimate total counts per cell by inverting the normalization
    total_counts = np.expm1(adata.X).sum(axis=1)  # Sum of raw counts per cell

    # Ensure total_counts is a column vector
    total_counts = np.array(total_counts).reshape((adata.n_obs, 1))

    # Undo normalization
    adata.layers[return_layer] = adata.X.expm1().multiply(total_counts) / scaling_factor

    # Convert to sparse matrix for efficiency
    adata.layers[return_layer] = csr_matrix(adata.layers[return_layer])

    print('Check that untransformed values are very close to integers')
    print(adata.layers[return_layer].data)

    # Round values to approximate original integer counts
    adata.layers[return_layer].data = np.round(adata.layers[return_layer].data)

    return adata
