Skip to content

Commit

Permalink
code cleanup and updates to grind, distort, and region finder
Browse files Browse the repository at this point in the history
  • Loading branch information
sa501428 committed Nov 14, 2019
1 parent e9c9e3c commit f1f3ec5
Show file tree
Hide file tree
Showing 5 changed files with 138 additions and 117 deletions.
2 changes: 1 addition & 1 deletion src/juicebox/HiCGlobals.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
public class HiCGlobals {

// Juicebox version (for display and header purposes only)
public static final String versionNum = "1.14.04";
public static final String versionNum = "1.14.05";
// Juicebox title
public static final String juiceboxTitle = "[Juicebox " + versionNum + "] Hi-C Map ";

Expand Down
235 changes: 130 additions & 105 deletions src/juicebox/tools/dev/Shuffle.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@ public class Shuffle extends JuicerCLT {
private Dataset ds;
private File outputDirectory;
private static int expectedCliqueSize = 3, expectedCliqueSizeWithBuffer = 12;
private int resolution = 50000;
private int neuralNetSize = 64;
private int highResolution = 10000;
private int resolution = 25000;
private int neuralNetSize = 500;
private int highResolution = 1000;
private int numPixelOverlapWhileSliding = 5;

public Shuffle() {
super("shuffle [-r res1,res2] [-k normalization] [-w clique size] [-m matrixSizesForNeuralNet]" +
Expand Down Expand Up @@ -164,64 +165,16 @@ private void assessOutlierContactAndPrintToFile(boolean isBadUpstream, int cliqu
@Override
public void run() {

Map<Integer, LocalGenomeRegion> indexToRegion = new HashMap<>();

ExecutorService executor = Executors.newFixedThreadPool(numCPUThreads);

HiCZoom zoom = ds.getZoomForBPResolution(resolution);
for (Chromosome chr : ds.getChromosomeHandler().getChromosomeArrayWithoutAllByAll()) {
Matrix matrix = ds.getMatrix(chr, chr);
if (matrix == null) continue;
MatrixZoomData zd = matrix.getZoomData(zoom);


int maxIndex = chr.getLength() / resolution + 1;
for (int k = 0; k < maxIndex; k++) {
indexToRegion.put(k, new LocalGenomeRegion(k, 2 * expectedCliqueSizeWithBuffer));
}

List<Block> blocks = zd.getNormalizedBlocksOverlapping(0, 0, maxIndex, maxIndex,
norm, false, false);
for (Block b : blocks) {
if (b != null) {
Runnable worker = new Runnable() {
@Override
public void run() {
for (ContactRecord cr : b.getContactRecords()) {
final int x = cr.getBinX();
final int y = cr.getBinY();
final float counts = cr.getCounts();

synchronized (indexToRegion) {
if (x != y) {
indexToRegion.get(x).addNeighbor(y, counts);
indexToRegion.get(y).addNeighbor(x, counts);
}
}
}
}
};
executor.execute(worker);
} else {
System.err.println("Block is null?");
}
}
}

executor.shutdown();

// Wait until all threads finish
while (!executor.isTerminated()) {
}
Map<Integer, LocalGenomeRegion> indexToRegion = getMapOfIndicesToRegion();

for (LocalGenomeRegion region : indexToRegion.values()) {
region.filterDownValues(expectedCliqueSizeWithBuffer);
}

File outputFile = new File(outputDirectory, "breakpoints.bed");

File outputBEDPEFile = new File(outputDirectory, "breakpoints.txt");
try {
final FileWriter positionsBED = new FileWriter(outputFile);
final FileWriter positionsBED = new FileWriter(new File(outputDirectory, "breakpoints.bed"));
final FileWriter positionsBEDPE = new FileWriter(outputBEDPEFile);
positionsBEDPE.write("#chr1\tx1\tx2\tch2\ty1\ty2\tcolor\n");

Expand Down Expand Up @@ -272,10 +225,14 @@ public void run() {
if (thisRegionisNotFineUpstream && thisRegionisNotFineDownstream) {
positionsBEDPE.write("assembly\t" + currX1 + "\t" + currX2 +
"\tassembly\t" + currX1 + "\t" + currX2 + "\t0,0,0\n");
} else if (thisRegionisNotFineUpstream) {
}

if (thisRegionisNotFineUpstream) {
assessOutlierContactAndPrintToFile(true, expectedCliqueSizeWithBuffer,
positionsBEDPE, selectedRegion, currX1, currX2);
} else if (thisRegionisNotFineDownstream) {
}

if (thisRegionisNotFineDownstream) {
assessOutlierContactAndPrintToFile(false, expectedCliqueSizeWithBuffer,
positionsBEDPE, selectedRegion, currX1, currX2);
}
Expand Down Expand Up @@ -313,54 +270,25 @@ public void process(String chr, List<Feature2D> feature2DList) {

File outputFolderForRes = new File(outputDirectory, "neural_net_run_" + highResolution);
UNIXTools.makeDir(outputFolderForRes);
String posPath = outputDirectory.getAbsolutePath() + "/chr_" + chrom.getName() + "_" + highResolution;
UNIXTools.makeDir(posPath);

try {
Writer fileNameWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(posPath + ".txt"), StandardCharsets.UTF_8));
String fileNameForWriter = outputDirectory.getAbsolutePath() + "/chr_" + chrom.getName() + "_" + highResolution + ".txt";
Writer fileNameWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileNameForWriter), StandardCharsets.UTF_8));

int[] breakPointsForBoundaries = getBreakPointsBasedOnCPUThreads(feature2DList.size());

ExecutorService executor = Executors.newFixedThreadPool(numCPUThreads);

for (Feature2D feature2D : feature2DList) {
int i0 = Math.max(0, (feature2D.getStart1() - resolution) / highResolution);
int j0 = Math.max(0, (feature2D.getStart2() - resolution) / highResolution);
int iMax = Math.min((feature2D.getEnd1() + resolution) / highResolution, chrom.getLength() / highResolution);
int jMax = Math.min((feature2D.getEnd2() + resolution) / highResolution, chrom.getLength() / highResolution);
for (int k = 0; k < numCPUThreads; k++) {
int indxStart = breakPointsForBoundaries[k];
int indxEnd = breakPointsForBoundaries[k + 1];

if (feature2D.getMidPt2() < feature2D.getMidPt1()) {
System.err.println("jM < iM; skipping - iM " + feature2D.getMidPt1() + " jM " + feature2D.getMidPt2() + " i0 " + i0 + " j0 " + j0);
continue;
}
Runnable worker = new Runnable() {
@Override
public void run() {
for (int i = i0; i < iMax; i += neuralNetSize / 4) {
for (int j = j0; j < jMax; j += neuralNetSize / 4) {

if (j < i) {
System.err.println("j < i; skipping - i " + i + " j " + j + " i0 " + i0 + " j0 " + j0);
continue;
}

int newJ = j;
boolean isContinuousRegion = false;
if (j <= i + neuralNetSize / 2) {
newJ = i + neuralNetSize / 2;
isContinuousRegion = true;
}

if (isContinuousRegion) {
getContiguousDataAndSaveToFile(zd, i, "assembly",
neuralNetSize / 2, norm, outputFolderForRes.getAbsolutePath(), fileNameWriter);
} else {
DistortionFinder.getTrainingDataAndSaveToFile(zd, zd, zd, i, i,
"assembly", "assembly", isContinuousRegion, neuralNetSize / 2, norm,
false, 0, "", null, outputFolderForRes.getAbsolutePath(),
null, null, null, fileNameWriter, null, null,
null, null, null, null, false);
}
}
for (int i = indxStart; i < indxEnd; i++) {
Feature2D feature2D = feature2DList.get(i);
processFeature2DAndGetRegionsOfInterestFromIt(feature2D, chrom, zd, fileNameWriter, outputFolderForRes.getAbsolutePath());
}
}
};
Expand All @@ -383,6 +311,47 @@ public void run() {

}

public void processFeature2DAndGetRegionsOfInterestFromIt(Feature2D feature2D, Chromosome chrom,
MatrixZoomData zd, Writer fileNameWriter, String outputFolderForRedPath) {
int i0 = Math.max(0, (feature2D.getStart1() - resolution) / highResolution);
int j0 = Math.max(0, (feature2D.getStart2() - resolution) / highResolution);
int iMax = Math.min((feature2D.getEnd1() + resolution) / highResolution, chrom.getLength() / highResolution);
int jMax = Math.min((feature2D.getEnd2() + resolution) / highResolution, chrom.getLength() / highResolution);
int neuralNetworkSlidingIncrement = neuralNetSize / 2 - numPixelOverlapWhileSliding;

if (feature2D.getMidPt2() < feature2D.getMidPt1()) {
//System.err.println("jM < iM; skipping - iM " + feature2D.getMidPt1() + " jM " + feature2D.getMidPt2() + " i0 " + i0 + " j0 " + j0);
return;
}
for (int i = i0; i < iMax; i += neuralNetworkSlidingIncrement) {
for (int j = j0; j < jMax; j += neuralNetworkSlidingIncrement) {

if (j < i) {
//System.err.println("j < i; skipping - i " + i + " j " + j + " i0 " + i0 + " j0 " + j0);
continue;
}

int newJ = j;
boolean isContinuousRegion = false;
if (j <= i + neuralNetSize / 2) {
newJ = i + neuralNetSize / 2;
isContinuousRegion = true;
}

if (isContinuousRegion) {
getContiguousDataAndSaveToFile(zd, i, "assembly",
neuralNetSize / 2, norm, outputFolderForRedPath, fileNameWriter);
} else {
DistortionFinder.getTrainingDataAndSaveToFile(zd, zd, zd, i, newJ,
"assembly", "assembly", isContinuousRegion, neuralNetSize / 2, norm,
false, 0, "", null, outputFolderForRedPath,
null, null, null, fileNameWriter, null, null,
null, null, null, null, false);
}
}
}
}

private void createDiagonalInputForNeuralNet(Dataset ds, File outputDirectory) {

System.out.println("Neural Net Size " + neuralNetSize);
Expand All @@ -401,26 +370,20 @@ private void createDiagonalInputForNeuralNet(Dataset ds, File outputDirectory) {

File outputFolderForRes = new File(outputDirectory, "neural_net_diag_run_" + highResolution);
UNIXTools.makeDir(outputFolderForRes);
String posPath = outputDirectory.getAbsolutePath() + "/diag_chr_" + chrom.getName() + "_" + highResolution;
UNIXTools.makeDir(posPath);

try {
Writer fileNameWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(posPath + ".txt"), StandardCharsets.UTF_8));

String diagNameWriterFile = outputDirectory.getAbsolutePath() + "/diag_chr_" + chrom.getName() + "_" + highResolution + ".txt";
Writer fileNameWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(diagNameWriterFile), StandardCharsets.UTF_8));

ExecutorService executor = Executors.newFixedThreadPool(numCPUThreads);

int iMax = chrom.getLength() / highResolution;

int[] breakPoints = new int[numCPUThreads + 1];
for (int k = 1; k < numCPUThreads; k++) {
breakPoints[k] = (k * iMax / numCPUThreads) + 1;
}
breakPoints[numCPUThreads] = iMax;
int[] breakPointsForBoundaries = getBreakPointsBasedOnCPUThreads(iMax);

for (int k = 0; k < numCPUThreads; k++) {
int iStart = breakPoints[k];
int iEnd = breakPoints[k + 1];
int iStart = breakPointsForBoundaries[k];
int iEnd = breakPointsForBoundaries[k + 1];

Runnable worker = new Runnable() {
@Override
Expand All @@ -447,4 +410,66 @@ public void run() {
}

}

public int[] getBreakPointsBasedOnCPUThreads(int maxNumIndices) {
int[] breakPoints = new int[numCPUThreads + 1];
for (int k = 1; k < numCPUThreads; k++) {
breakPoints[k] = (k * maxNumIndices / numCPUThreads) + 1;
}
breakPoints[numCPUThreads] = maxNumIndices;
return breakPoints;
}

private Map<Integer, LocalGenomeRegion> getMapOfIndicesToRegion() {
Map<Integer, LocalGenomeRegion> indexToRegion = new HashMap<>();
System.out.println("Num of CPU threads: " + numCPUThreads);
ExecutorService executor = Executors.newFixedThreadPool(numCPUThreads);

HiCZoom zoom = ds.getZoomForBPResolution(resolution);
for (Chromosome chr : ds.getChromosomeHandler().getChromosomeArrayWithoutAllByAll()) {
Matrix matrix = ds.getMatrix(chr, chr);
if (matrix == null) continue;
MatrixZoomData zd = matrix.getZoomData(zoom);


int maxIndex = chr.getLength() / resolution + 1;
for (int k = 0; k < maxIndex; k++) {
indexToRegion.put(k, new LocalGenomeRegion(k, 2 * expectedCliqueSizeWithBuffer));
}

List<Block> blocks = zd.getNormalizedBlocksOverlapping(0, 0, maxIndex, maxIndex,
norm, false, false);
for (Block b : blocks) {
if (b != null) {
Runnable worker = new Runnable() {
@Override
public void run() {
for (ContactRecord cr : b.getContactRecords()) {
final int x = cr.getBinX();
final int y = cr.getBinY();
final float counts = cr.getCounts();

synchronized (indexToRegion) {
if (x != y) {
indexToRegion.get(x).addNeighbor(y, counts);
indexToRegion.get(y).addNeighbor(x, counts);
}
}
}
}
};
executor.execute(worker);
} else {
System.err.println("Block is null?");
}
}
}

executor.shutdown();

// Wait until all threads finish
while (!executor.isTerminated()) {
}
return indexToRegion;
}
}
8 changes: 3 additions & 5 deletions src/juicebox/tools/utils/juicer/grind/DistortionFinder.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ public static void getTrainingDataAndSaveToFile(MatrixZoomData zd1, MatrixZoomDa
box1RectUL, box1RectLR, box2RectUL, box2RectLR, imgHalfSliceWidth, norm);

float[][] labelsMatrix = GrindUtils.generateDefaultDistortionLabelsFile(compositeMatrix.length, 4, isContinuousRegion);
GrindUtils.cleanUpLabelsMatrixBasedOnData(labelsMatrix, compositeMatrix);
//GrindUtils.cleanUpLabelsMatrixBasedOnData(labelsMatrix, compositeMatrix);

String filePrefix = prefixString + "orig_" + chrom1Name + "_" + box1XIndex + "_" + chrom2Name + "_" + box2XIndex + "_matrix";
GrindUtils.saveGrindMatrixDataToFile(filePrefix, negPath, compositeMatrix, negDataWriter, false);
Expand All @@ -103,15 +103,13 @@ public static void getTrainingDataAndSaveToFile(MatrixZoomData zd1, MatrixZoomDa
}
}

int checkForMultipleOfN = 3;

if (includeLabels) {
for (int k = 0; k < numManipulations; k++) {
Pair<float[][], float[][]> alteredMatrices = GrindUtils.randomlyManipulateMatrix(compositeMatrix, labelsMatrix);
Pair<float[][], float[][]> alteredMatrices = GrindUtils.randomlyManipulateMatrix(compositeMatrix, labelsMatrix, generator);
compositeMatrix = alteredMatrices.getFirst();
labelsMatrix = alteredMatrices.getSecond();

if (k % checkForMultipleOfN == 0) {
if (k == 0 || k == (numManipulations - 1) || generator.nextBoolean()) {
filePrefix = prefixString + "dstrt_" + chrom1Name + "_" + box1XIndex + "_" + chrom2Name + "_" + box2XIndex + "_" + k + "_matrix";
GrindUtils.saveGrindMatrixDataToFile(filePrefix, posPath, compositeMatrix, posDataWriter, false);
GrindUtils.saveGrindMatrixDataToFile(filePrefix + "_labels", posPath, labelsMatrix, posLabelWriter, false);
Expand Down
8 changes: 3 additions & 5 deletions src/juicebox/tools/utils/juicer/grind/GrindUtils.java
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@

public class GrindUtils {

private static final Random generator = new Random(0);

public static int[][] appropriatelyTransformVerticalStripes(int[][] data) {
int[][] transformedData = new int[data[0].length][data.length];
for (int i = 0; i < data.length; i++) {
Expand Down Expand Up @@ -248,9 +246,9 @@ public static void saveGrindMatrixDataToImage(String fileName, String path, doub
ImageIO.write(image, "PNG", myNewPNGFile);
}

public static Pair<float[][], float[][]> randomlyManipulateMatrix(float[][] data, float[][] labels) {
public static Pair<float[][], float[][]> randomlyManipulateMatrix(float[][] data, float[][] labels, Random generator) {
float[][] newData, newLabels;
Pair<Integer, Integer> boundaries = randomlyPickTwoIndices(data.length);
Pair<Integer, Integer> boundaries = randomlyPickTwoIndices(data.length, generator);
int lengthTranslocation = boundaries.getSecond() - boundaries.getFirst() + 1;
int newPosition = generator.nextInt(data.length - lengthTranslocation);

Expand Down Expand Up @@ -346,7 +344,7 @@ private static Pair<float[][], float[][]> splitApartRowsOfMatrix(float[][] sourc
return new Pair<>(copyRegionNOTBeingTranslated, copyRegionBeingTranslated);
}

private static Pair<Integer, Integer> randomlyPickTwoIndices(int length) {
private static Pair<Integer, Integer> randomlyPickTwoIndices(int length, Random generator) {
Integer a = generator.nextInt(length);
Integer b = generator.nextInt(length);
while (Math.abs(a - b) < 2) {
Expand Down
2 changes: 1 addition & 1 deletion src/juicebox/tools/utils/juicer/grind/RegionFinder.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

abstract public class RegionFinder {

final Random generator = new Random();
protected static final Random generator = new Random(0);
protected Integer x;
protected Integer y;
protected Integer z;
Expand Down

0 comments on commit f1f3ec5

Please sign in to comment.