#### Analyse the images for DNA dropping
```
# Request for resources on O2
srun --pty -p interactive --mem 170G -t 0-02:00 /bin/bash

conda create -n dna_corr python=3

# Copy python function inside the environment for future use
~/.conda/envs/dna_corr/bin/

# Activate the conda environment
conda activate dna_corr

# Run the function
cd /n/scratch2/ajit/ptcl_tma/PTCL8/registration
dna_corr(image_filepath = sys.argv[1], output_filepath = sys.argv[2]) # Format for running function
python /home/ajn16/.conda/envs/dna_corr/bin/dna_corr.py PTCL7_484.ome.tif /n/scratch2/ajit/ptcl_tma/dna_corr

```
**Function used for generating DNA correlation**

```
# Import necessary packages
import os
import sys
import numpy as np
from skimage.external import tifffile
import matplotlib.pyplot as plt
import pandas as pd


def dna_corr(image_filepath, output_filepath):
    # read image
    image = tifffile.imread(image_filepath)
    # get number of cycles, assuming 4 channels per cycle
    num_cycle = image.shape[0] // 4
    # empty array to store correlation coefficient result
    corrcoef = np.zeros(num_cycle)
    # reference DNA intensity, hard-coded to be the DNA channel of the first cycle
    reference_dna = image[0]
    # loop over each cycle
    for cycle_index in range(num_cycle):
        # get the current DNA intensity
        current_dna = image[cycle_index*4]
        # calculate correlation coefficient. value range: [-1,+1]
        corrcoef[cycle_index] = np.corrcoef(current_dna.flatten(), reference_dna.flatten())[0,1]

    ## Generate Figure
    plt.plot(corrcoef)
    plt.ylim(0, 1)
    plt.title("Correlation coefficient over cycles")
    plt.xlabel('Cycles')
    plt.ylabel('Correlation Coefficient')
    plt.savefig(output_filepath + "/correlation coefficient.png")

    ## Generate the excel sheet and save it
    corrcoef_df = pd.DataFrame(corrcoef)
    corrcoef_df = corrcoef_df.rename(columns={0: "correlation coefficient"})
    corrcoef_df.insert(0, 'Cycle', range(1, 1 + len(corrcoef_df)))
    corrcoef_df.to_csv(output_filepath + "/correlation coefficient.csv", index=False)

# Run the function
dna_corr(image_filepath = sys.argv[1], output_filepath = sys.argv[2])

```

#### Clean up the CSV file to remove DNA channels and trim channel names

```
# Preprocess the data
def preprocess (data, file_name=None,drop_markers=None):
    '''
    Parameters:
        data: Data matrix generated by histocat
        file_name: String. If a value is provided, the program will remove the sting from the marker names
        drop_markers: Arrray. List of markers to drop from the analysis. e.g. ["CD3D", "CD20"]
    Returns:
        Data and meta data
    Example:
        preprocess (data, file_name=None,islog=True)
    '''
    # Unique name for the data
    data.index = data.index.astype(str) + np.repeat('_',len(data.index)).astype(str) + data['CellId'].astype(str)
    data = data.drop(['CellId'], axis=1)
    # If there is INF replace with zero
    data = data.replace([np.inf, -np.inf], 0)
    # Remove DNA channels
    data = data.loc[:,~data.columns.str.contains('dna', case=False)] 
    # Split the data into expression data and meta data
    # Step-1 (Find the index of the column with name Area)
    split_idx = data.columns.get_loc("Area")
    meta = data.iloc [:,split_idx:]
    # Step-2 (select only the expression values)
    d = data.iloc [:,:split_idx]        
    # Rename the columns of the data
    if file_name != None:
        d.columns = list(map(lambda x: x.replace(file_name,''),list(d.columns)))
    # Drop unnecessary markers
    if drop_markers != None:
        d = d.drop(columns = drop_markers)
    # Return data
    return d

# Run the pre processing function.
data = pd.read_csv('//research.files.med.harvard.edu/ImStor/sorger/data/RareCyte/Connor/Z155_PTCL/whole_section_csv_files/raw/Ton_192.csv')
file_name= "Cell_Ton_192"
data_clean = preprocess_d (data, file_name ,drop_markers= ["PERK", "NOS2"])
meta_clean = preprocess_m (data, file_name ,drop_markers= ["PERK", "NOS2"]) # change return to meta

# Write the data out
output_filepath = "//research.files.med.harvard.edu/ImStor/sorger/data/RareCyte/Connor/Z155_PTCL/whole_section_csv_files/"
file_name= "TON"
data_clean.to_csv(output_filepath + file_name + "_data.csv", index=False)
meta_clean.to_csv(output_filepath + file_name + "_meta.csv", index=False)
```

#### Cell type calling (using R)