Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(7/7) RFC79: Implement incremental upload of CNA segmented data #39

Merged
merged 3 commits into from
Jun 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions scripts/importer/cbioportal_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -383,6 +383,7 @@ class MetaFileTypes(object):
MetaFileTypes.TIMELINE,
MetaFileTypes.GENE_PANEL_MATRIX,
MetaFileTypes.STRUCTURAL_VARIANT,
MetaFileTypes.SEG,
]

IMPORTER_CLASSNAME_BY_META_TYPE = {
Expand Down
27 changes: 25 additions & 2 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoClinicalData.java
Original file line number Diff line number Diff line change
Expand Up @@ -370,7 +370,6 @@ public static List<ClinicalData> getSampleData(int cancerStudyId, Collection<Str
public static void removeSampleAttributesData(int sampleInternalId) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = JdbcUtil.getDbConnection(DaoClinicalData.class);
pstmt = con.prepareStatement(SAMPLE_ATTRIBUTES_DELETE);
Expand All @@ -381,7 +380,31 @@ public static void removeSampleAttributesData(int sampleInternalId) throws DaoEx
throw new DaoException(e);
}
finally {
JdbcUtil.closeAll(DaoClinicalData.class, con, pstmt, rs);
JdbcUtil.closeAll(DaoClinicalData.class, con, pstmt, null);
}
}

public static void removeSampleAttributesData(Set<Integer> sampleInternalIds, String attrId) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
try {
con = JdbcUtil.getDbConnection(DaoClinicalData.class);
pstmt = con.prepareStatement("DELETE FROM " + SAMPLE_ATTRIBUTES_TABLE
+ " WHERE `ATTR_ID` = ? AND `INTERNAL_ID` IN ("
+ String.join(",", Collections.nCopies(sampleInternalIds.size(), "?"))
+ ")");
int parameterIndex = 1;
pstmt.setString(parameterIndex++, attrId);
for (Integer sampleInternalId : sampleInternalIds) {
pstmt.setInt(parameterIndex++, sampleInternalId);
}
pstmt.executeUpdate();
}
catch (SQLException e) {
throw new DaoException(e);
}
finally {
JdbcUtil.closeAll(DaoClinicalData.class, con, pstmt, null);
}
}

Expand Down
41 changes: 37 additions & 4 deletions src/main/java/org/mskcc/cbio/portal/dao/DaoCopyNumberSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ public static int addCopyNumberSegment(CopyNumberSegment seg) throws DaoExceptio
}
}

public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) throws DaoException {
public static void createFractionGenomeAlteredClinicalData(int cancerStudyId, Set<Integer> sampleIds, boolean updateMode) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
Expand All @@ -80,8 +80,15 @@ public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) th
"AS c2 WHERE c2.`CANCER_STUDY_ID` = c1.`CANCER_STUDY_ID` AND c2.`SAMPLE_ID` = c1.`SAMPLE_ID` AND " +
"ABS(c2.`SEGMENT_MEAN`) >= 0.2) / SUM(`END`-`START`)) AS `VALUE` FROM `copy_number_seg` AS c1 , `cancer_study` " +
"WHERE c1.`CANCER_STUDY_ID` = cancer_study.`CANCER_STUDY_ID` AND cancer_study.`CANCER_STUDY_ID`=? " +
"GROUP BY cancer_study.`CANCER_STUDY_ID` , `SAMPLE_ID` HAVING SUM(`END`-`START`) > 0;");
pstmt.setInt(1, cancerStudyId);
(sampleIds == null ? "" : ("AND `SAMPLE_ID` IN ("+ String.join(",", Collections.nCopies(sampleIds.size(), "?")) + ") "))
+"GROUP BY cancer_study.`CANCER_STUDY_ID` , `SAMPLE_ID` HAVING SUM(`END`-`START`) > 0;");
int parameterIndex = 1;
pstmt.setInt(parameterIndex++, cancerStudyId);
if (sampleIds != null) {
for (Integer sampleId : sampleIds) {
pstmt.setInt(parameterIndex++, sampleId);
}
}
Map<Integer, String> fractionGenomeAltereds = new HashMap<Integer, String>();
rs = pstmt.executeQuery();
while (rs.next()) {
Expand All @@ -94,7 +101,10 @@ public static void createFractionGenomeAlteredClinicalData(int cancerStudyId) th
false, "20", cancerStudyId);
DaoClinicalAttributeMeta.addDatum(attr);
}


if (updateMode) {
DaoClinicalData.removeSampleAttributesData(fractionGenomeAltereds.keySet(), FRACTION_GENOME_ALTERED_ATTR_ID);
}
for (Map.Entry<Integer, String> fractionGenomeAltered : fractionGenomeAltereds.entrySet()) {
DaoClinicalData.addSampleDatum(fractionGenomeAltered.getKey(), FRACTION_GENOME_ALTERED_ATTR_ID, fractionGenomeAltered.getValue());
}
Expand Down Expand Up @@ -283,4 +293,27 @@ public static boolean segmentDataExistForSample(int cancerStudyId, int sampleId)
JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs);
}
}

public static void deleteSegmentDataForSamples(int cancerStudyId, Set<Integer> sampleIds) throws DaoException {
Connection con = null;
PreparedStatement pstmt = null;
ResultSet rs = null;
try {
con = JdbcUtil.getDbConnection(DaoCopyNumberSegment.class);
pstmt = con.prepareStatement("DELETE FROM `copy_number_seg`" +
" WHERE `CANCER_STUDY_ID`= ?" +
" AND `SAMPLE_ID` IN (" + String.join(",", Collections.nCopies(sampleIds.size(), "?"))
+ ")");
int parameterIndex = 1;
pstmt.setInt(parameterIndex++, cancerStudyId);
for (Integer sampleId : sampleIds) {
pstmt.setInt(parameterIndex++, sampleId);
}
pstmt.executeUpdate();
} catch (SQLException e) {
throw new DaoException(e);
} finally {
JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ public static int addCopyNumberSegmentFile(CopyNumberSegmentFile copySegFile) th
} catch (SQLException e) {
throw new DaoException(e);
} finally {
JdbcUtil.closeAll(DaoCopyNumberSegment.class, con, pstmt, rs);
JdbcUtil.closeAll(DaoCopyNumberSegmentFile.class, con, pstmt, rs);
}
}

Expand All @@ -86,6 +86,9 @@ public static CopyNumberSegmentFile getCopyNumberSegmentFile(int cancerStudyId)
cnsf.referenceGenomeId = CopyNumberSegmentFile.ReferenceGenomeId.valueOf(rs.getString("REFERENCE_GENOME_ID"));
cnsf.description = rs.getString("DESCRIPTION");
cnsf.filename = rs.getString("FILENAME");
if (rs.next()) {
throw new SQLException("More than one row was returned.");
}
return cnsf;
}
return null;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,9 @@
import java.io.FileReader;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.HashSet;
import java.util.Properties;
import java.util.Set;

/**
* Import Segment data into database.
Expand All @@ -64,14 +66,17 @@
public class ImportCopyNumberSegmentData extends ConsoleRunnable {

private int entriesSkipped;

private boolean updateMode;
private Set<Integer> processedSampleIds;

private void importData(File file, int cancerStudyId) throws IOException, DaoException {
MySQLbulkLoader.bulkLoadOn();
FileReader reader = new FileReader(file);
BufferedReader buf = new BufferedReader(reader);
try {
String line = buf.readLine(); // skip header line
long segId = DaoCopyNumberSegment.getLargestId();
processedSampleIds = new HashSet<>();
while ((line=buf.readLine()) != null) {
ProgressMonitor.incrementCurValue();
ConsoleUtil.showProgress();
Expand All @@ -81,8 +86,7 @@ private void importData(File file, int cancerStudyId) throws IOException, DaoExc
System.err.println("wrong format: "+line);
}

CancerStudy cancerStudy = DaoCancerStudy.getCancerStudyByInternalId(cancerStudyId);
String chrom = strs[1].trim();
String chrom = strs[1].trim();
//validate in same way as GistitReader:
ValidationUtils.validateChromosome(chrom);

Expand Down Expand Up @@ -112,6 +116,10 @@ private void importData(File file, int cancerStudyId) throws IOException, DaoExc
CopyNumberSegment cns = new CopyNumberSegment(cancerStudyId, s.getInternalId(), chrom, start, end, numProbes, segMean);
cns.setSegId(++segId);
DaoCopyNumberSegment.addCopyNumberSegment(cns);
processedSampleIds.add(s.getInternalId());
}
if (updateMode) {
DaoCopyNumberSegment.deleteSegmentDataForSamples(cancerStudyId, processedSampleIds);
}
MySQLbulkLoader.flushAll();
}
Expand All @@ -127,6 +135,7 @@ public void run() {
OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true);
String dataFile = (String) options.valueOf("data");
File descriptorFile = new File((String) options.valueOf("meta"));
updateMode = options.has("overwrite-existing");

Properties properties = new Properties();
properties.load(new FileInputStream(descriptorFile));
Expand All @@ -135,13 +144,13 @@ public void run() {

CancerStudy cancerStudy = getCancerStudy(properties);

if (segmentDataExistsForCancerStudy(cancerStudy)) {
if (!updateMode && segmentDataExistsForCancerStudy(cancerStudy)) {
throw new IllegalArgumentException("Seg data for cancer study " + cancerStudy.getCancerStudyStableId() + " has already been imported: " + dataFile);
}

importCopyNumberSegmentFileMetadata(cancerStudy, properties);
importCopyNumberSegmentFileData(cancerStudy, dataFile);
DaoCopyNumberSegment.createFractionGenomeAlteredClinicalData(cancerStudy.getInternalId());
DaoCopyNumberSegment.createFractionGenomeAlteredClinicalData(cancerStudy.getInternalId(), processedSampleIds, updateMode);
if( MySQLbulkLoader.isBulkLoad()) {
pieterlukasse marked this conversation as resolved.
Show resolved Hide resolved
MySQLbulkLoader.flushAll();
}
Expand All @@ -164,7 +173,7 @@ private static boolean segmentDataExistsForCancerStudy(CancerStudy cancerStudy)
return (DaoCopyNumberSegment.segmentDataExistForCancerStudy(cancerStudy.getInternalId()));
}

private static void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy, Properties properties) throws DaoException {
private void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy, Properties properties) throws DaoException {
CopyNumberSegmentFile copyNumSegFile = new CopyNumberSegmentFile();
copyNumSegFile.cancerStudyId = cancerStudy.getInternalId();
String referenceGenomeId = properties.getProperty("reference_genome_id").trim();
Expand All @@ -179,7 +188,18 @@ private static void importCopyNumberSegmentFileMetadata(CancerStudy cancerStudy,
copyNumSegFile.referenceGenomeId = getRefGenId(referenceGenomeId);
copyNumSegFile.description = properties.getProperty("description").trim();
copyNumSegFile.filename = properties.getProperty("data_filename").trim();
DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumSegFile);
CopyNumberSegmentFile storedCopyNumSegFile = DaoCopyNumberSegmentFile.getCopyNumberSegmentFile(cancerStudy.getInternalId());
if (updateMode && storedCopyNumSegFile != null) {
if (storedCopyNumSegFile.referenceGenomeId != copyNumSegFile.referenceGenomeId) {
throw new IllegalStateException("You are trying to upload "
+ copyNumSegFile.referenceGenomeId
+ " reference genome data into "
+ storedCopyNumSegFile.referenceGenomeId
+ " reference genome data.");
}
} else {
DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumSegFile);
}
}

private void importCopyNumberSegmentFileData(CancerStudy cancerStudy, String dataFilename) throws IOException, DaoException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -135,8 +135,13 @@ public void run() {
try {
String description = "Import 'timeline' data";

OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, false);
String dataFile = (String) options.valueOf("data");
OptionSet options = ConsoleUtil.parseStandardDataAndMetaOptions(args, description, true);
if (options.has("loadMode") && !"bulkLoad".equals(options.valueOf("loadMode"))) {
throw new UnsupportedOperationException("This loader supports bulkLoad load mode only, but "
+ options.valueOf("loadMode")
+ " has been supplied.");
}
String dataFile = (String) options.valueOf("data");
pieterlukasse marked this conversation as resolved.
Show resolved Hide resolved
File descriptorFile = new File((String) options.valueOf("meta"));
boolean overwriteExisting = options.has("overwrite-existing");

Expand Down
4 changes: 0 additions & 4 deletions src/main/java/org/mskcc/cbio/portal/util/ConsoleUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -178,10 +178,6 @@ public static OptionSet parseStandardDataAndMetaOptions(String[] args, String de
"Error: unknown loadMode action: " + actionArg);
}
}
else {
throw new UsageException(progName, description, parser,
"Error: 'loadMode' argument required.");
}
}
return options;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
/*
* This file is part of cBioPortal.
*
* cBioPortal is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/

package org.mskcc.cbio.portal.integrationTest.incremental;

import org.junit.Before;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.mskcc.cbio.portal.dao.DaoCancerStudy;
import org.mskcc.cbio.portal.dao.DaoClinicalData;
import org.mskcc.cbio.portal.dao.DaoCopyNumberSegment;
import org.mskcc.cbio.portal.dao.DaoCopyNumberSegmentFile;
import org.mskcc.cbio.portal.dao.DaoException;
import org.mskcc.cbio.portal.dao.DaoSample;
import org.mskcc.cbio.portal.dao.MySQLbulkLoader;
import org.mskcc.cbio.portal.model.CancerStudy;
import org.mskcc.cbio.portal.model.ClinicalData;
import org.mskcc.cbio.portal.model.CopyNumberSegment;
import org.mskcc.cbio.portal.model.CopyNumberSegmentFile;
import org.mskcc.cbio.portal.model.Sample;
import org.mskcc.cbio.portal.scripts.ImportCopyNumberSegmentData;
import org.springframework.test.annotation.Rollback;
import org.springframework.test.context.ContextConfiguration;
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
import org.springframework.transaction.annotation.Transactional;

import java.io.File;
import java.util.List;
import java.util.Set;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;

/**
* Tests Incremental Import of CNA segmented data.
*
* @author Ruslan Forostianov
* @author Pieter Lukasse
*/
@RunWith(SpringJUnit4ClassRunner.class)
@ContextConfiguration(locations = { "classpath:/applicationContext-dao.xml" })
@Rollback
@Transactional
public class TestIncrementalCopyNumberSegmentDataImport {

/**
* Test incremental upload of CNA SEG data
*/
@Test
public void testIncrementalUpload() throws DaoException {
String segSampleId = "TCGA-A1-A0SE-01";
Sample segDataSample = DaoSample.getSampleByCancerStudyAndSampleId(cancerStudy.getInternalId(), segSampleId);

CopyNumberSegmentFile copyNumberSegmentFile = new CopyNumberSegmentFile();
copyNumberSegmentFile.cancerStudyId = cancerStudy.getInternalId();
copyNumberSegmentFile.referenceGenomeId = CopyNumberSegmentFile.ReferenceGenomeId.hg19;
copyNumberSegmentFile.segFileId = 1;
copyNumberSegmentFile.filename = "test_file.seg";
copyNumberSegmentFile.description = "test seg file description";
DaoCopyNumberSegmentFile.addCopyNumberSegmentFile(copyNumberSegmentFile);
DaoClinicalData.addSampleDatum(segDataSample.getInternalId(), "FRACTION_GENOME_ALTERED", "TEST");
MySQLbulkLoader.bulkLoadOn();
CopyNumberSegment copyNumberSegment = new CopyNumberSegment(
cancerStudy.getInternalId(),
segDataSample.getInternalId(),
"1",
3218610,
95674710,
100,
0.01);
copyNumberSegment.setSegId(1L);
DaoCopyNumberSegment.addCopyNumberSegment(copyNumberSegment);
MySQLbulkLoader.flushAll();

File dataFolder = new File("src/test/resources/incremental/copy_number_alteration/");
File metaFile = new File(dataFolder, "meta_cna_seg.txt");
File dataFile = new File(dataFolder, "data_cna.seg");

ImportCopyNumberSegmentData importCnaSegData = new ImportCopyNumberSegmentData(new String[] {
"--loadMode", "bulkLoad",
"--meta", metaFile.getAbsolutePath(),
"--data", dataFile.getAbsolutePath(),
"--overwrite-existing",
});
importCnaSegData.run();

CopyNumberSegmentFile fetchedCopyNumberSegmentFile = DaoCopyNumberSegmentFile.getCopyNumberSegmentFile(cancerStudy.getInternalId());
assertNotNull(fetchedCopyNumberSegmentFile);
assertEquals("test_file.seg", fetchedCopyNumberSegmentFile.filename);
List<CopyNumberSegment> cnaSegments = DaoCopyNumberSegment
.getSegmentForASample(segDataSample.getInternalId(), cancerStudy.getInternalId());
assertEquals(9, cnaSegments.size());
List<ClinicalData> clinicalData = DaoClinicalData.getSampleData(cancerStudy.getInternalId(), Set.of(segSampleId));
ClinicalData fractionGenomeAltered = clinicalData.stream()
.filter(cd -> "FRACTION_GENOME_ALTERED".equals(cd.getAttrId())).findFirst().get();
assertEquals("0.0000", fractionGenomeAltered.getAttrVal());
}

public static final String STUDY_ID = "study_tcga_pub";
private CancerStudy cancerStudy;

@Before
public void setUp() throws DaoException {
cancerStudy = DaoCancerStudy.getCancerStudyByStableId(STUDY_ID);
}

}
10 changes: 10 additions & 0 deletions src/test/resources/incremental/copy_number_alteration/data_cna.seg
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
ID chrom loc.start loc.end num.mark seg.mean
TCGA-A1-A0SE-01 1 3218610 95674710 53225 0.0055
TCGA-A1-A0SE-01 1 95676511 95676518 2 -1.6636
TCGA-A1-A0SE-01 1 95680124 167057183 24886 0.0053
TCGA-A1-A0SE-01 1 167057495 167059336 3 -1.0999
TCGA-A1-A0SE-01 1 167059760 181602002 9213 -8e-04
TCGA-A1-A0SE-01 1 181603120 181609567 6 -1.2009
TCGA-A1-A0SE-01 1 181610685 201473647 12002 0.0055
TCGA-A1-A0SE-01 1 201474400 201474544 2 -1.4235
TCGA-A1-A0SE-01 1 201475220 247813706 29781 -4e-04
Loading
Loading