This is a notebook demonstrating CENCIC algorithm for circuit search with using ASD mutations

Requirement data file:
1. ASD mutation bias file: "Spark_Meta_EWS.Z2.bias.FDR.csv"
2. Information Score for connetome "Spark_Meta_EWS.Z2.info.csv"

Scripts used in this notebook:
1. script.Pareto.generate_bias_lim.py
2. script.Pareto.generate_bias_lim.py
3. script.Pareto.generate_bias_lim.py
4. script.Pareto.generate_bias_lim.py
5. script.Pareto.generate_bias_lim.py
6. script.Pareto.generate_bias_lim.py

In [None]:
%load_ext autoreload
%autoreload 2
import sys
import os

RootDIR = "/home/jw3514/Work/ASD_Circuits_CellType/" # put this in the right place
os.chdir(RootDIR + "/notebooks_mouse_str") # put this in the right place
print(f"Current working directory: {os.getcwd()}")
sys.path.insert(1, RootDIR + 'src')
# Need to add src directory to Python path first

#sys.path.append("../src")
from ASD_Circuits import *

# 1. Calculate mutation strcture biases with ASD mutations

In [None]:
Spark_ASD_STR_Bias = pd.read_csv("../dat/Unionize_bias/Spark_Meta_EWS.Z2.bias.FDR.csv", index_col=0)
Spark_ASD_STR_Bias.head(2)

# 2. Calculate CCS scores

# 3. Circuit Search with SA

### 3.1 Calculate Biaslim for SA search

This will generate bias limits for different circuit sizes. In our paper we use size 46 as main search size since it has highest CCS score.

The bias step size is 0.005 when bias > 0.3, 0.01 when bias > 0.2 and 0.05 when bias <= 0.2, to decrease computation burden. (we care less about the bias limits for low bias)

In [None]:
# Calculate bias limits for different circuit sizes
OutDIR = "../dat/CircuitSearch/Biaslims/"
BiasDF = Spark_ASD_STR_Bias

sizes = np.arange(10, 100, 1)
for i, t in enumerate(sizes):
    fout = open(OutDIR + "biaslim.size.{}.txt".format(t), 'w')
    writer = csv.writer(fout)
    lims = BiasLim(BiasDF, t)
    for size, bias in lims:
        writer.writerow([size, bias])
    fout.close()

In [None]:
Selected_BiasLim = pd.read_csv(OutDIR + "biaslim.size.46.txt", names=["size", "bias"])
Selected_BiasLim = Selected_BiasLim[Selected_BiasLim["bias"] >= 0.3] # select bias >= 0.3 to reduce number of jobs
Selected_BiasLim.reset_index(inplace=True, drop=True)
Selected_BiasLim.to_csv(OutDIR + "biaslim.size.46.top17.txt", index=False)


In [None]:
Selected_BiasLim

### 3.2 run bash script to search circuits

# Circuit Search Using Simulated Annealing
Now we can run the bash script `scripts/submit_job_run_pareto.SI.sh` to search for circuits using the selected bias limits.

This is a computationally intensive process that may take 1-2 days to complete, depending on:
- Number of parallel threads used
- Size of the search space
- Number of bias limits being explored

Here is the list of files/variables used in the bash script:
- `BiasDF=../dat/Unionize_bias/Spark_Meta_EWS.Z2.bias.FDR.csv`: ASD mutation bias file
- `AdjMat=../dat/allen-mouse-conn/ScoreingMat_jw_v3/WeightMat.Ipsi.csv`: Connection weights for connetome
- `InfoMat=../dat/allen-mouse-conn/ScoreingMat_jw_v3/InfoMat.Ipsi.csv`: Information Score for connetome
- `BiasLim=../dat/CircuitSearch/Biaslims/biaslim.size.46.txt`: Bias limits for different circuit sizes
- `DIR=../dat/CircuitSearch/results/ASD_Pareto_46`:  Output directory
- `NJob`: Number of total searches to complete the Pareto front (number of bias limits), calculated using `wc -l $BiasLim | cut -f 1 -d ' '`
- `Nparallel=20`: Number of parallel threads used

Fill the variables and run the bash script.


### 3.3 Collect results and visualize

In [None]:
def normtoUnit(x, xmin, xmax):
    return (x-xmin)/(xmax-xmin)

def searchFil(text, DIR):
    #print(text)
    RES = []
    for file in os.listdir(DIR):
        if text in file:
            RES.append(file)
    return RES

def LoadSA3(fname, DIR, InfoMat, minbias, topL=100):
    fin = open(DIR+fname, 'rt')
    max_score, max_bias, max_STRs = 0, 0, []
    for i, l in enumerate(fin):
        if i > topL:
            break
        l = l.strip().split()
        bias = float(l[1])
        if bias < minbias:
            continue
        STRs = l[2].split(",")
        score = ScoreCircuit_SI_Joint(STRs, InfoMat)
        if score > max_score:
            max_score = score
            max_bias = bias
            max_STRs = STRs
    return max_score, max_bias, max_STRs

def GetData2(params, size, DIR, adj_mat, InfoMat):
    SCORES, CutBias, RealBias, STRS = [],[],[],[]
    for i, row in params.iterrows():
        fil = searchFil("keepN_{}-minbias_{}.txt".format(size, row["bias"]), DIR)[0]
        score, real_minbias, STRs = LoadSA3(fil, DIR, InfoMat, row["bias"])
        score = ScoreCircuit_SI_Joint(STRs, InfoMat)
        if score == 0:
            continue
        SCORES.append(score)
        CutBias.append(row["bias"])
        RealBias.append(real_minbias)
        STRS.append(STRs)
    return SCORES, CutBias, RealBias, STRS

def XXXX_cont(BiasDF, BiasDF2, biaslim_df, size, DIR, adj_mat, InfoMat):
    #fil = searchFil("keepN_{}-minbias_{}.txt".format(size, bias), DIR)[0]
    SCORES, CutBias, RealBias, STRS = GetData2(biaslim_df, size, DIR, adj_mat, InfoMat)
    New_RealBias = []
    for STRSET in STRS:
        xx = BiasDF.loc[STRSET, "EFFECT"].mean()
        New_RealBias.append(xx)
    # Add top size STRs
    topNSTRs = BiasDF.index.values[:size]
    bias = BiasDF.head(size)["EFFECT"].mean()
    score = ScoreCircuit_SI_Joint(topNSTRs, InfoMat)
    SCORES.append(score)
    CutBias.append(bias)
    New_RealBias.append(bias)
    STRS.append(topNSTRs)    
    return SCORES, CutBias, New_RealBias, STRS

def search_target_swap(size, BiasDF, NSwap, biaslim_df, adj_mat, 
                       ProbMat1, ProbMat1_short, ProbMat1_long, 
                       ProbMat2, ProbMat2_short, ProbMat2_long, DIR):
    # TopN targets 
    topNSTRs = BiasDF.index.values[:size]
    bias = BiasDF.head(size)["EFFECT"].mean()
    score = ScoreCircuit_v7(topNSTRs, adj_mat, ProbMat1, ProbMat2)
    # search along the profile
    for i, row in biaslim_df.iterrows():
        fil = searchFil("keepN_{}-minbias_{}.txt".format(size, row["bias"]), DIR)[0]
        cohe, real_minbias, STRs = LoadSA3(fil, DIR, adj_mat, ProbMat1, ProbMat2)
        score = ScoreCircuit_v7(STRs, adj_mat, ProbMat1, ProbMat2)

        bias = BiasDF.loc[STRs, "EFFECT"].mean()
        NDiff = len(set(STRs).difference(topNSTRs))
        if abs(NDiff-NSwap) < 2:

            score1 = ScoreCircuit_v7(STRs, adj_mat, ProbMat1_short, ProbMat2_short)
            score2 = ScoreCircuit_v7(STRs, adj_mat, ProbMat1_long, ProbMat2_long)
            if score > 0.714:
                #print(RegionDistributionsList(STRs))
                print(score, score1, score2)
            return bias, score, score1, score2
    return None, None, None, None

def search_target_swap2(size, BiasDF, biaslim, biaslim_df, adj_mat, 
                       ProbMat1, ProbMat1_short, ProbMat1_long, 
                       ProbMat2, ProbMat2_short, ProbMat2_long, DIR):
    # TopN targets 
    topNSTRs = BiasDF.index.values[:size]
    bias = BiasDF.head(size)["EFFECT"].mean()
    score = ScoreCircuit_v7(topNSTRs, adj_mat, ProbMat1, ProbMat2)
    # search along the profile
    for i, row in biaslim_df.iterrows():
        fil = searchFil("keepN_{}-minbias_{}.txt".format(size, row["bias"]), DIR)[0]
        cohe, real_minbias, STRs = LoadSA3(fil, DIR, adj_mat, ProbMat1, ProbMat2)
        score = ScoreCircuit_v7(STRs, adj_mat, ProbMat1, ProbMat2)

        bias = BiasDF.loc[STRs, "EFFECT"].mean()
        #print(round(real_minbias,3), biaslim)
        if round(real_minbias,3) == biaslim:
            score1 = ScoreCircuit_v7(STRs, adj_mat, ProbMat1_short, ProbMat2_short)
            score2 = ScoreCircuit_v7(STRs, adj_mat, ProbMat1_long, ProbMat2_long)
            if score2 > 0.673:
                #print()
                print(RegionDistributionsList(STRs))
            return bias, score, score1, score2
    return None, None, None, None

def LoadProfiles(BiasDF, BiasDF2, biaslim_df, size, DIR, adj_mat, InfoMat):
    Scores, CutBias, RealBias, STRS = GetData2(biaslim_df, size, DIR, adj_mat, InfoMat)
    # Add top size STRs
    topNSTRs = BiasDF.index.values[:size]
    bias = BiasDF2.head(size)["EFFECT"].mean()
    score = ScoreCircuit_SI_Joint(topNSTRs, InfoMat)
    Scores.append(score)
    CutBias.append(bias)
    RealBias.append(bias)
    STRS.append(topNSTRs)    
    return Scores, CutBias, RealBias, STRS

In [None]:
# Read connectome files
InfoMat = pd.read_csv("../dat/allen-mouse-conn/ConnectomeScoringMat/InfoMat.Ipsi.csv", index_col=0)
adj_mat = pd.read_csv("../dat/allen-mouse-conn/ConnectomeScoringMat/WeightMat.Ipsi.csv", index_col=0)
InfoMat_short = pd.read_csv("../dat/allen-mouse-conn/ConnectomeScoringMat/InfoMat.Ipsi.short.csv", index_col=0)
InfoMat_long = pd.read_csv("../dat/allen-mouse-conn/ConnectomeScoringMat/InfoMat.Ipsi.long.csv", index_col=0)

In [None]:
size = 46
#biaslim_df = pd.read_csv(biaslim_dir + "biaslim.size.{}.txt".format(size), names=["size", "bias"])
ASD_DIR = "../dat/CircuitSearch/SA/ASD_Pareto_SI_Size46/"
ASD_BiasDF = Spark_ASD_STR_Bias
biaslim_df = pd.read_csv(OutDIR + "biaslim.size.46.top17.txt")
COHESPeak, CutBiasPeak, RealBiasPeak, STRSPeak = LoadProfiles(ASD_BiasDF, ASD_BiasDF, biaslim_df, size, 
                                              ASD_DIR, adj_mat, InfoMat)
ASD_DFPeak = pd.DataFrame(data={"Cohe":COHESPeak, "minBias":CutBiasPeak, "Bias":RealBiasPeak})

In [None]:
plt.figure(dpi=120, figsize=(5,5))
plt.plot(ASD_DFPeak["Cohe"].values, ASD_DFPeak["Bias"].values, marker=".", color="#542788",  lw=2, markersize=8,
             ls = "-", label="ASD")
plt.scatter(ASD_DFPeak["Cohe"].values[-3], ASD_DFPeak["Bias"].values[-3], marker="x", s=50, color="red",
           zorder=100, label="Selected Circuits")

plt.xlabel("Circuit Score")
plt.ylabel("Mean Structure bias")
plt.grid()
plt.ylim((0.05, 0.4))
plt.legend()

In [None]:
# print the selected circuits
print(RegionDistributionsList(STRSPeak[-3]))

## Plotting ASD and Sibling Circuit Data

The following analysis compares ASD circuits with sibling control data.

**Note:** The sibling data shown here is for visualization purposes only. The full analysis used data generated by running simulated annealing (SA) search with the same procedure on 10,000 subsampled sibling sets, which is too large to include here.

To generate your own sibling data:
1. Use the bash script `scripts/submit_job_run_pareto.SI.sh` 
2. Run it with different sibling sets as input

For details on the original procedure used to generate these profiles, see `Optimized_Circuits_Information_Score.ipynb`.


In [None]:
# Load variables from numpy file
sibling_data = np.load('../dat/CircuitSearch/SA/ASD_Pareto_SI_Size46/circuit_analysis_data.sibling.SA.npz')
meanbias = sibling_data['meanbias']
meanSI = sibling_data['meanSI']
topbias_sub = sibling_data['topbias_sub']

In [None]:
fig, ax = plt.subplots(dpi=480, figsize=(4.2,4))

ax.plot(ASD_DFPeak["Cohe"].values, ASD_DFPeak["Bias"].values, marker=".", color="#542788",  lw=2, markersize=8,
             ls = "-", label="ASD")
ax.scatter(ASD_DFPeak["Cohe"].values[-4], ASD_DFPeak["Bias"].values[-4], marker="x", s=70, color="red", lw=2,
           zorder=100)
ax.text(ASD_DFPeak["Cohe"].values[-4], 0.01 + ASD_DFPeak["Bias"].values[-4], s="Selected\n Circuit")

ax.plot(topbias_sub[:,0,:].T, topbias_sub[:,1,:].T, color="grey", markersize=1, lw=0.5,
             ls = "-", alpha=0.05)
#ax.plot(topbias_sub[0,0,:].T, topbias_sub[0,1,:].T, color="grey", markersize=1, lw=1,
#             ls = "-", alpha=1, label="Sibling Circuit")

ax.plot(meanSI, meanbias, marker=".", color="Orange", lw=2, markersize=8,
             ls = "-", alpha=1, label="Average Sibling Circuit")
ax.plot(meanSI, meanbias, color="grey", lw=2, markersize=8,
             ls = "-", alpha=1, label="Sibling Circuit", zorder=0)

#box = ax.get_position()
#ax.set_position([box.x0, box.y0, box.width * 0.8, box.height])
#ax.legend(loc="center left", bbox_to_anchor=(1, 0.9))
ax.legend(loc="lower left", frameon=False)

plt.xlabel("Circuit Connectivity Score", fontsize=14)
plt.ylabel("Average Mutation Bias", fontsize=14)
plt.grid(True, alpha=0.2)
plt.ylim(0.05, 0.42)

plt.show()

### 3.4 Annoate resulting circuits